1. Informações, Pacotes R e datasets


################################################################################
################################################################################
##############  (1) Informações iniciais, instalação pacotes R #################
##################   e criação dos dados de treino e teste   ###################
################################################################################
################################################################################

################################################################################
## 1.1 Informações iniciais ####################################################
################################################################################

################################################################################
### 1.1.1 Informações sobre a variável desfecho ________________________________
################################################################################

Nome_Variavel_Eixo_Y<-"HOMA-IR"

Unidade_Medida_Variavel_Eixo_Y <- ""

Limite_de_decisao_do_teste<-"2.7"

################################################################################
### 1.1.2 Informações sobre a amostragem treino e teste ________________________
################################################################################

Percentual_dos_dados_usado_como_treino<- "70"

################################################################################
## 1.2 Especificações da Qualidade Analítica ###################################
###### baseado nos componentes da Variação Biológica ###########################
################################################################################

################################################################################
### 1.2.1 Bias permitido baseado na variação biológica _________________________
################################################################################

Coeficiente_de_variação_biológico_individual________CVI<-""

Coeficiente_de_variação_biológico_grupo_____________CVG<-""

################################################################################
### 1.2.2 Bias permitido baseado na variação biológica _________________________
######################### Marque um x ##########################################
################################################################################

especificações_de_desempenho_otimo<-""

especificações_de_desempenho_desejavel<-""

especificações_de_desempenho_minimo<-""

################################################################################
############################ (CLICAR EM KNIT) ##################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################

################################################################################
## 1.3 Instalação Pacote R #####################################################
################################################################################



Pacotes_R <- c("rmarkdown","knitr","multimode","modeest","rstatix","univOutl",
               "readxl","utf8","pwr","forecast","MultNonParam","ggplot2","earth",
               "plotly","DT","kableExtra","dplyr","stats","segmented","caret",
               "refineR","SciViews","rcompanion","effectsize","car","moments",
               "ggpubr")

if(sum(as.numeric(!Pacotes_R %in% installed.packages())) != 0){
  instalador <- Pacotes_R[!Pacotes_R %in% installed.packages()]
  for(i in 1:length(instalador)) {
    install.packages(instalador, dependencies = TRUE)
    break()}
  sapply(Pacotes_R, require, character = TRUE)
  } else {
    sapply(Pacotes_R, require, character = TRUE)
  }
## Carregando pacotes exigidos: rmarkdown
## Carregando pacotes exigidos: knitr
## Carregando pacotes exigidos: multimode
## Carregando pacotes exigidos: modeest
## Carregando pacotes exigidos: rstatix
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
## 
##     filter
## Carregando pacotes exigidos: univOutl
## Carregando pacotes exigidos: robustbase
## Carregando pacotes exigidos: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Carregando pacotes exigidos: readxl
## Carregando pacotes exigidos: utf8
## Carregando pacotes exigidos: pwr
## Carregando pacotes exigidos: forecast
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Registered S3 method overwritten by 'forecast':
##   method          from  
##   predict.default statip
## 
## Attaching package: 'forecast'
## The following object is masked from 'package:modeest':
## 
##     naive
## Carregando pacotes exigidos: MultNonParam
## Carregando pacotes exigidos: ICSNP
## Carregando pacotes exigidos: mvtnorm
## Carregando pacotes exigidos: ICS
## Carregando pacotes exigidos: ggplot2
## Carregando pacotes exigidos: earth
## Carregando pacotes exigidos: Formula
## Carregando pacotes exigidos: plotmo
## Carregando pacotes exigidos: plotrix
## Carregando pacotes exigidos: TeachingDemos
## 
## Attaching package: 'TeachingDemos'
## The following objects are masked from 'package:Hmisc':
## 
##     cnvrt.coords, subplot
## Carregando pacotes exigidos: plotly
## Registered S3 method overwritten by 'httr':
##   method         from  
##   print.response rmutil
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:TeachingDemos':
## 
##     subplot
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:Hmisc':
## 
##     subplot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
## Carregando pacotes exigidos: DT
## Carregando pacotes exigidos: kableExtra
## Carregando pacotes exigidos: dplyr
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
## 
##     group_rows
## The following objects are masked from 'package:Hmisc':
## 
##     src, summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Carregando pacotes exigidos: segmented
## Carregando pacotes exigidos: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## The following object is masked from 'package:plotly':
## 
##     select
## The following object is masked from 'package:rstatix':
## 
##     select
## The following object is masked from 'package:multimode':
## 
##     geyser
## Carregando pacotes exigidos: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
## The following object is masked from 'package:forecast':
## 
##     getResponse
## Carregando pacotes exigidos: caret
## Carregando pacotes exigidos: lattice
## Carregando pacotes exigidos: refineR
## 
## Attaching package: 'refineR'
## The following object is masked from 'package:forecast':
## 
##     BoxCox
## Carregando pacotes exigidos: SciViews
## Carregando pacotes exigidos: rcompanion
## 
## Attaching package: 'rcompanion'
## The following object is masked from 'package:forecast':
## 
##     accuracy
## Carregando pacotes exigidos: effectsize
## Registered S3 method overwritten by 'parameters':
##   method         from  
##   predict.kmeans statip
## 
## Attaching package: 'effectsize'
## The following object is masked from 'package:rcompanion':
## 
##     phi
## The following objects are masked from 'package:rstatix':
## 
##     cohens_d, eta_squared
## Carregando pacotes exigidos: car
## Carregando pacotes exigidos: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## Carregando pacotes exigidos: moments
## 
## Attaching package: 'moments'
## The following object is masked from 'package:modeest':
## 
##     skewness
## Carregando pacotes exigidos: ggpubr
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:forecast':
## 
##     gghistogram
##    rmarkdown        knitr    multimode      modeest      rstatix     univOutl 
##         TRUE         TRUE         TRUE         TRUE         TRUE         TRUE 
##       readxl         utf8          pwr     forecast MultNonParam      ggplot2 
##         TRUE         TRUE         TRUE         TRUE         TRUE         TRUE 
##        earth       plotly           DT   kableExtra        dplyr        stats 
##         TRUE         TRUE         TRUE         TRUE         TRUE         TRUE 
##    segmented        caret      refineR     SciViews   rcompanion   effectsize 
##         TRUE         TRUE         TRUE         TRUE         TRUE         TRUE 
##          car      moments       ggpubr 
##         TRUE         TRUE         TRUE
library(utf8)
set.seed(210002859)
options(es.use_symbols = TRUE) 
options(scipen = 999)

################################################################################
## 1.4 Rótulo eixo X ###########################################################
################################################################################

Nome_Variavel_Eixo_X <- "Prolactina"
Unidade_Medida_Variavel_Eixo_X <- "ng/mL"

rotulo_eixo_x1<-Nome_Variavel_Eixo_X
rotulo_eixo_x2<-paste(Nome_Variavel_Eixo_X,
                     "(",Unidade_Medida_Variavel_Eixo_X,")")
rotulo_eixo_x<-ifelse(Unidade_Medida_Variavel_Eixo_X=="",
                      rotulo_eixo_x1,rotulo_eixo_x2)
rotulo_eixo_x_médio<-paste("Média dos resultados de",rotulo_eixo_x)

################################################################################
## 1.5 Rótulo eixo Y ###########################################################
################################################################################

rotulo_eixo_y1<-Nome_Variavel_Eixo_Y
rotulo_eixo_y2<-paste(Nome_Variavel_Eixo_Y,"(",Unidade_Medida_Variavel_Eixo_Y,")")
rotulo_eixo_y<-ifelse(Unidade_Medida_Variavel_Eixo_Y=="",rotulo_eixo_y1,rotulo_eixo_y2)
rotulo_eixo_y_médio<-paste("Média dos resultados de",rotulo_eixo_y)
rotulo_eixo_y_médio2<-paste("Média ",rotulo_eixo_y)

################################################################################
## 1.6 Nomeando, lendo e tabelando os resultados individuais e médios ##########
################################################################################

################################################################################
### 1.5.1 Resultados individuais _______________________________________________
################################################################################

base_de_dados_indiv<-read_excel("1_Dataset/dataset_individuais.xlsx")
## New names:
## • `` -> `...3`
## • `` -> `...4`
base_de_dados_indiv<-subset(base_de_dados_indiv, select = 1:2)

base_de_dados_indiv <- data.frame(base_de_dados_indiv,Grupo = "")

base_de_dados_indiv <- base_de_dados_indiv %>%
  dplyr::mutate(Grupo = ifelse(base_de_dados_indiv$eixo_x<7,"PRL<7",
                               ifelse(base_de_dados_indiv$eixo_x>=7 & 
                                        base_de_dados_indiv$eixo_x<25,"7<=PRL<25",
                                      ifelse(base_de_dados_indiv$eixo_x>=25 & 
                                        base_de_dados_indiv$eixo_x<100,"25<=PRL<100",
                                        "PRL>100"))))

colnames_indv <- c(rotulo_eixo_y1, rotulo_eixo_x1,"Faixa Prolactina")
colnames_indv <- enc2utf8(colnames_indv)

datatable(base_de_dados_indiv, class = "cell-border stripe",
          colnames = colnames_indv,
          caption = "Tabela 1. Resultado individuais.",
          filter = "top",
          escape = FALSE,
          options = list(
            searchHighlight = TRUE,
initComplete = JS("function(settings, json) {",
                              "$(this.api().table().header()).css({'background-color': '#0d47a1', 'color': '#fff'});",
                              "}"
)))
## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
################################################################################
### 1.6.1 Resultados médios ____________________________________________________
################################################################################

base_de_dados<-read_excel("1_Dataset/dataset_medias.xlsx")
## New names:
## • `` -> `...3`
## • `` -> `...4`
base_de_dados<-subset(base_de_dados, select = 1:2)

base_de_dados <- base_de_dados %>%
  dplyr::mutate(Grupo = ifelse(base_de_dados$eixo_x<7,"PRL<7",
                        ifelse(base_de_dados$eixo_x>=7 & 
                        base_de_dados$eixo_x<25,"7<=PRL<25",
                        ifelse(base_de_dados$eixo_x>=25 & 
                        base_de_dados$eixo_x<100,"25<=PRL<100","PRL>100"))))

colnames <- c(rotulo_eixo_y_médio, rotulo_eixo_x_médio,"Faixa Prolactina")
colnames <- enc2utf8(colnames)

datatable(base_de_dados, class = "cell-border stripe",
          colnames = colnames,
          caption = "Tabela 2. Média dos Resultados.",
          filter = "top",
          escape = FALSE,
          options = list(
            searchHighlight = TRUE,
initComplete = JS("function(settings, json) {",
                              "$(this.api().table().header()).css({'background-color': '#0d47a1', 'color': '#fff'});",
                              "}"
)))
################################################################################
### 1.7 Dividindo em base de dados treino e teste ##############################
################################################################################

set.seed(210002859)

train_prop<-ifelse(Percentual_dos_dados_usado_como_treino=="",0.7,
            as.numeric(Percentual_dos_dados_usado_como_treino)/100)

amostra_treino <- sample(nrow(base_de_dados), replace=FALSE, 
                         size=train_prop*nrow(base_de_dados))

train_data <- base_de_dados[amostra_treino, ]
test_data <- base_de_dados[-amostra_treino, ]


2. Estatística descritiva

################################################################################
################################################################################
#######################  (2) Estatística descritiva ############################
################################################################################
################################################################################

library(utf8)
set.seed(210002859)

################################################################################
## 2.1 Tamanho amostral ########################################################
################################################################################

n_x<-round(length(base_de_dados_indiv$eixo_y),3)
n_y<-round(length(base_de_dados_indiv$eixo_x),3)

################################################################################
## 2.2 Medidas tendência central ###############################################
################################################################################

media_x<-round(mean(base_de_dados_indiv$eixo_x),3)
media_y<-round(mean(base_de_dados_indiv$eixo_y),3)

mediana_x<-round(median(base_de_dados_indiv$eixo_x),3)
mediana_y<-round(median(base_de_dados_indiv$eixo_y),3)

moda_x<-round(locmodes(base_de_dados_indiv$eixo_x,mod0=1)$locations,3)
## Warning in locmodes(base_de_dados_indiv$eixo_x, mod0 = 1): If the density
## function has an unbounded support, artificial modes may have been created in
## the tails
moda_y<-round(locmodes(base_de_dados_indiv$eixo_y,mod0=1)$locations,3)
## Warning in locmodes(base_de_dados_indiv$eixo_y, mod0 = 1): If the density
## function has an unbounded support, artificial modes may have been created in
## the tails
menor.valor_x<-round(min(base_de_dados_indiv$eixo_x),3)
menor.valor_y<-round(min(base_de_dados_indiv$eixo_y),3)

maior.valor_x<-round(max(base_de_dados_indiv$eixo_x),3)
maior.valor_y<-round(max(base_de_dados_indiv$eixo_y),3)

################################################################################
## 2.3 Medidas dispersão #######################################################
################################################################################

dp_x<-round(sd(base_de_dados_indiv$eixo_x),3)
dp_y<-round(sd(base_de_dados_indiv$eixo_y),3)

var_x<-round(var(base_de_dados_indiv$eixo_x),3)
var_y<-round(var(base_de_dados_indiv$eixo_y),3)

IQR_x<-round(IQR(base_de_dados_indiv$eixo_x),3)
IQR_y<-round(IQR(base_de_dados_indiv$eixo_y),3)

amplitude_x<-round(maior.valor_x-menor.valor_x,3)
amplitude_y<-round(maior.valor_y-menor.valor_y,3)

################################################################################
## 2.4 Tabelando medidas de Posição ############################################
################################################################################

Estatistica<-c("Tamanho amostral","Mínimo", "Moda","Média","Mediana","Máximo")
Estatistica_X<-c(n_x,menor.valor_x,moda_x,media_x,mediana_x,maior.valor_x)
Estatistica_Y<-c(n_y,menor.valor_y,moda_y,media_y,mediana_y,maior.valor_y)

Tabela3<- data.frame(Estatistica =Estatistica,
                     Estatistica_X=Estatistica_X,
                     Estatistica_Y=Estatistica_Y)

colnames_indv2 <- c("Estatísticas",rotulo_eixo_x1 ,rotulo_eixo_y1)
colnames_indv2 <- enc2utf8(colnames_indv2)

kable(Tabela3,
  col.names = colnames_indv2,align = "cc",
  caption = "Tabela 3. Medidas de Posição - Resultados individuais.") %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2")
Tabela 3. Medidas de Posição - Resultados individuais.
Estatísticas Prolactina HOMA-IR
Tamanho amostral 65795.000 65795.000
Mínimo 0.300 0.100
Moda 10.306 2.444
Média 11.902 2.516
Mediana 9.100 2.000
Máximo 297.500 150.700
################################################################################
## 2.5 Tabelando medidas de disperssão #########################################
################################################################################

Estatistica2<-c("Tamanho amostral","Desvio-Padrão (DP)","Variância",
                "Intervalo Interquartil (IIQ)","Amplitude (Range)")
Estatistica_X2<-c(n_x,dp_x,var_x,IQR_x,amplitude_x)
Estatistica_Y2<-c(n_y,dp_y,var_y,IQR_y,amplitude_y)

Tabela4<- data.frame(Estatistica2 =Estatistica2,
                       Estatistica_X2=Estatistica_X2,
                       Estatistica_Y2=Estatistica_Y2)

kable(Tabela4,col.names = colnames_indv2,align = "cc",
      caption = "Tabela 4. Medidas de Dispersão  - Resultados individuais.") %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2")
Tabela 4. Medidas de Dispersão - Resultados individuais.
Estatísticas Prolactina HOMA-IR
Tamanho amostral 65795.000 65795.000
Desvio-Padrão (DP) 11.499 2.530
Variância 132.222 6.399
Intervalo Interquartil (IIQ) 6.900 1.700
Amplitude (Range) 297.200 150.600
################################################################################
## 2.6 Histograma resultados eixo x ############################################
################################################################################

tabela.dados1<-as.data.frame(table(data=base_de_dados_indiv$eixo_x))
n.resultados.diff1<-length(tabela.dados1$data)
quebras.hist.1_medida_central<-round(1+(3.222*log(n_x)),digits=0)
quebras.hist.2_medida_central<-50
quebras.hist.3_medida_central<-ifelse(n.resultados.diff1<25,15,
                             ifelse(n.resultados.diff1<100,25,
                             ifelse(n_x<500,quebras.hist.1_medida_central,
                             quebras.hist.2_medida_central)))

hist_x <- ggplot(base_de_dados_indiv, aes(x = eixo_x)) +
  geom_histogram(color = "black", fill = "lightblue", aes(y = ..count..),
                 bins = quebras.hist.3_medida_central) +
  labs(x = rotulo_eixo_x, y = "Contagem") +
  scale_x_continuous(breaks = seq(min(base_de_dados_indiv$eixo_x), 
                                  max(base_de_dados_indiv$eixo_x), length.out = 6)) +
  theme(axis.line = element_line(colour = "black", size = 0.5),
        axis.title = element_text(size = 20),
        axis.text  = element_text(colour = "black", size = 18),
        plot.title = element_text(size = 14, face = "bold"),
        plot.background = element_rect(fill = "transparent"),
        panel.background = element_blank(),
        legend.position = "none")
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
hist_x
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggsave("4_Figuras/Fig.1_Histograma_PRL_individuais.png",
       plot = hist_x, device = "png")
## Saving 7 x 5 in image
################################################################################
## 2.7 Histograma resultados eixo y ############################################
################################################################################

tabela.dados2<-as.data.frame(table(data=base_de_dados_indiv$eixo_y))
n.resultados.diff2<-length(tabela.dados2$data)
quebras.hist.1_medida_disper<-round(1+(3.222*log(n_y)),digits=0)
quebras.hist.2_medida_disper<-50
quebras.hist.3_medida_disper<-ifelse(n.resultados.diff2<25,15,
                             ifelse(n.resultados.diff2<100,25,
                             ifelse(n_y<500,quebras.hist.1_medida_disper,
                             quebras.hist.2_medida_disper)))

hist_y <- ggplot(base_de_dados_indiv, aes(x = eixo_y)) +
  geom_histogram(color = "black", fill = "lightblue", aes(y = ..count..),
                 bins = quebras.hist.3_medida_disper) +
  labs(x = rotulo_eixo_y, y = "Contagem") +
  scale_x_continuous(breaks = seq(min(base_de_dados_indiv$eixo_y), 
                                  max(base_de_dados_indiv$eixo_y), length.out = 6)) +
  theme(axis.line = element_line(colour = "black", size = 0.5),
        axis.title = element_text(size = 20),
        axis.text  = element_text(colour = "black", size = 18),
        plot.title = element_text(size = 14, face = "bold"),
        plot.background = element_rect(fill = "transparent"),
        panel.background = element_blank(),
        legend.position = "none")
hist_y

ggsave("4_Figuras/Fig.2_Histograma_Variável_Desfecho_individuais.png",
       plot = hist_y, device = "png")
## Saving 7 x 5 in image
################################################################################
## 2.8 Tamanho amostral - resultados médios ####################################
################################################################################

n_x_medios<-round(length(base_de_dados$eixo_y),3)
n_y_medios<-round(length(base_de_dados$eixo_x),3)

################################################################################
## 2.9 Medidas tendência central - resultados médios ###########################
################################################################################

media_x_medios<-round(mean(base_de_dados$eixo_x),3)
media_y_medios<-round(mean(base_de_dados$eixo_y),3)

mediana_x_medios<-round(median(base_de_dados$eixo_x),3)
mediana_y_medios<-round(median(base_de_dados$eixo_y),3)

moda_x_medios<-round(locmodes(base_de_dados$eixo_x,mod0=1)$locations,3)
## Warning in locmodes(base_de_dados$eixo_x, mod0 = 1): If the density function
## has an unbounded support, artificial modes may have been created in the tails
moda_y_medios<-round(locmodes(base_de_dados$eixo_y,mod0=1)$locations,3)
## Warning in locmodes(base_de_dados$eixo_y, mod0 = 1): If the density function
## has an unbounded support, artificial modes may have been created in the tails
menor.valor_x_medios<-round(min(base_de_dados$eixo_x),3)
menor.valor_y_medios<-round(min(base_de_dados$eixo_y),3)

maior.valor_x_medios<-round(max(base_de_dados$eixo_x),3)
maior.valor_y_medios<-round(max(base_de_dados$eixo_y),3)

################################################################################
## 2.10 Medidas dispersão - resultados médios ##################################
################################################################################

dp_x_medios<-round(sd(base_de_dados$eixo_x),3)
dp_y_medios<-round(sd(base_de_dados$eixo_y),3)

var_x_medios<-round(var(base_de_dados$eixo_x),3)
var_y_medios<-round(var(base_de_dados$eixo_y),3)

IQR_x_medios<-round(IQR(base_de_dados$eixo_x),3)
IQR_y_medios<-round(IQR(base_de_dados$eixo_y),3)

amplitude_x_medios<-round(maior.valor_x_medios-menor.valor_x_medios,3)
amplitude_y_medios<-round(maior.valor_y_medios-menor.valor_y_medios,3)

################################################################################
## 2.11 Tabelando medidas de Posição  - resultados médios ######################
################################################################################

Estatistica_medios<-c("Tamanho amostral","Mínimo","Moda","Média",
                      "Mediana","Máximo")
Estatistica_X_medios<-c(n_x_medios,menor.valor_x_medios,moda_x_medios,
                        media_x_medios,mediana_x_medios,maior.valor_x_medios)
Estatistica_Y_medios<-c(n_y_medios,menor.valor_y_medios,moda_y_medios,
                        media_y_medios,mediana_y_medios,maior.valor_y_medios)

Tabela5<- data.frame(Estatistica =Estatistica_medios,
                     Estatistica_X=Estatistica_X_medios,
                     Estatistica_Y=Estatistica_Y_medios)

colnames_indv2 <- c("Estatísticas",rotulo_eixo_x1 ,rotulo_eixo_y1)
colnames_indv2 <- enc2utf8(colnames_indv2)

kable(Tabela5,
  col.names = colnames_indv2,align = "cc",
  caption = "Tabela 5. Medidas de Posição - resultados médios.") %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2")
Tabela 5. Medidas de Posição - resultados médios.
Estatísticas Prolactina HOMA-IR
Tamanho amostral 106.000 106.000
Mínimo 2.312 1.967
Moda 15.858 2.294
Média 25.954 2.432
Mediana 15.671 2.369
Máximo 222.475 3.270
################################################################################
## 2.12 Tabelando medidas de disperssão - resultados médios ####################
################################################################################

Estatistica2_medios<-c("Tamanho amostral","Desvio-Padrão (DP)","Variância",
                "Intervalo Interquartil (IIQ)","Amplitude (Range)")
Estatistica_X2_medios<-c(n_x_medios,dp_x_medios,var_x_medios,IQR_x_medios,
                  amplitude_x_medios)
Estatistica_Y2_medios<-c(n_y_medios,dp_y_medios,var_y_medios,IQR_y_medios,
                  amplitude_y_medios)

Tabela6<- data.frame(Estatistica2 =Estatistica2_medios,
                       Estatistica_X2=Estatistica_X2_medios,
                       Estatistica_Y2=Estatistica_Y2_medios)

kable(Tabela6,col.names = colnames_indv2,align = "cc",
      caption = "Tabela 6. Medidas de Dispersão - resultados médios.") %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2")
Tabela 6. Medidas de Dispersão - resultados médios.
Estatísticas Prolactina HOMA-IR
Tamanho amostral 106.000 106.000
Desvio-Padrão (DP) 34.708 0.261
Variância 1204.654 0.068
Intervalo Interquartil (IIQ) 16.719 0.326
Amplitude (Range) 220.163 1.303
################################################################################
## 2.13 Histograma resultados eixo x  - resultados médios ######################
################################################################################

tabela.dados3<-as.data.frame(table(data=base_de_dados$eixo_x))
n.resultados.diff1_medios<-length(tabela.dados3$data)
quebras.hist.1_medida_central_medios<-round(1+(3.222*log(n_x_medios)),digits=0)
quebras.hist.2_medida_central_medios<-50
quebras.hist.3_medida_central_medios<-ifelse(n.resultados.diff1_medios<25,15,
                                      ifelse(n.resultados.diff1_medios<100,25,
                                      ifelse(n_x_medios<500,
                                      quebras.hist.1_medida_central_medios,
                                      quebras.hist.2_medida_central_medios)))

hist_x_medios <- ggplot(base_de_dados, aes(x = eixo_x)) +
  geom_histogram(color = "black", fill = "lightblue", aes(y = ..count..),
                 bins = quebras.hist.3_medida_central_medios) +
  labs(x = rotulo_eixo_x_médio, y = "Contagem") +
  scale_x_continuous(breaks = seq(min(base_de_dados$eixo_x), max(base_de_dados$eixo_x), 
                                  length.out = 6),
                     labels = function(x) sprintf("%.2f", x)) +
  theme(axis.line = element_line(colour = "black", size = 0.5),
        axis.title = element_text(size = 20),
        axis.text  = element_text(colour = "black", size = 18),
        plot.title = element_text(size = 14, face = "bold"),
        plot.background = element_rect(fill = "transparent"),
        panel.background = element_blank(),
        legend.position = "none")
hist_x_medios

ggsave("4_Figuras/Fig.3_Histograma_PRL_medias.png",
       plot = hist_x_medios, device = "png")
## Saving 7 x 5 in image
################################################################################
## 2.14 Histograma resultados eixo y  - resultados médios ######################
################################################################################

tabela.dados4<-as.data.frame(table(data=base_de_dados$eixo_y))
n.resultados.diff2_medios<-length(tabela.dados4$data)
quebras.hist.1_medida_disper_medios<-round(1+(3.222*log(n_y_medios)),digits=0)
quebras.hist.2_medida_disper_medios<-50
quebras.hist.3_medida_disper_medios<-ifelse(n.resultados.diff2_medios<25,15,
                             ifelse(n.resultados.diff2_medios<100,25,
                             ifelse(n_y_medios<500,
                                    quebras.hist.1_medida_disper_medios,
                             quebras.hist.2_medida_disper_medios)))

hist_y_medios <- ggplot(base_de_dados, aes(x = eixo_y)) +
  geom_histogram(color = "black", fill = "lightblue", aes(y = ..count..),
                 bins = quebras.hist.3_medida_disper_medios) +
  labs(x = rotulo_eixo_y_médio, y = "Contagem") +
  scale_x_continuous(breaks = seq(min(base_de_dados$eixo_y), max(base_de_dados$eixo_y), length.out = 6),
                     labels = function(x) sprintf("%.2f", x)) +
  theme(axis.line = element_line(colour = "black", size = 0.5),
        axis.title = element_text(size = 20),
        axis.text  = element_text(colour = "black", size = 18),
        plot.title = element_text(size = 14, face = "bold"),
        plot.background = element_rect(fill = "transparent"),
        panel.background = element_blank(),
        legend.position = "none")
hist_y_medios

ggsave("4_Figuras/Fig.4_Histograma_Variável_Desfecho_medias.png",
       plot = hist_y_medios, device = "png")
## Saving 7 x 5 in image


3. Verificando presupostos

################################################################################
################################################################################
#######################  (3) Verificando presupostos ###########################
################################################################################
################################################################################

library(ggpubr)
library(moments)
library(car)
set.seed(210002859)

alpha <- 0.005

Platykurtic<-"A distribuição pode ser considerada platicúrtica."
Leptokurtic<-"A distribuição pode ser considerada leptocúrtica."
Mesokurtic<-"A distribuição pode ser considerada mesocúrtica."

lista.nomes.shape<-c("Coeficiente de curtose",
                      "Interpretação do coeficiente de curtose",
                      "Coeficiente de assimetria",
                      "Interpretando o resultado do coeficiente de assimetria")

################################################################################
## 3.1 Pressupostos dos resultados individuais #################################
################################################################################

################################################################################
### 3.1.1 Teste homogeneidade variâncias - resultados individuais ______________
################################################################################

base_de_dados_indiv$Grupo <- as.factor(base_de_dados_indiv$Grupo)
levene_result_indiv <-leveneTest(eixo_y ~ Grupo, data = base_de_dados_indiv)

levene_results_indiv_interpretacao<-ifelse(levene_result_indiv[1, 3] > alpha,
                    paste("Os grupos apresentam variâncias homogêneas (p =", 
                    round(levene_result_indiv[1, 3], 4), "alfa: > 0.005)."),
                    paste("Os grupos não apresentam variâncias homogêneas (p =", 
                    round(levene_result_indiv[1, 3], 4), "<= alfa: 0.005)."))

################################################################################
### 3.1.2 Tabela Teste homogeneidade variâncias - resultados individuais _______
################################################################################

nome_coluna_tabela7<-c("GL","Valor F", "p-valor")
Tabela7<-data.frame(levene_result_indiv)
Titulo_Tabela7<-"Tabela 7. Resultado do Teste de Homogeneidade das Variâncias (Levene) - resultados individuais:"

kable(Tabela7,col.names = nome_coluna_tabela7,
  align = "cc",caption = Titulo_Tabela7) %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2") %>%  
  footnote(general_title = "Interpretação",
           general = levene_results_indiv_interpretacao)
Tabela 7. Resultado do Teste de Homogeneidade das Variâncias (Levene) - resultados individuais:
GL Valor F p-valor
group 3 63.76796 0
65791 NA NA
Interpretação
Os grupos não apresentam variâncias homogêneas (p = 0 <= alfa: 0.005).
################################################################################
### 3.2.3 Analise Curtose - resultados individuais - Prolactina ________________
################################################################################

curtose<-function(dataset) {
  require(moments)
  round(kurtosis(dataset),2)
}

PRL.indiv_Kurtosis<-curtose(base_de_dados_indiv$eixo_x)

PRL.indiv_conclusao.Kurtosis<-ifelse(PRL.indiv_Kurtosis>3.3,Leptokurtic,
                              ifelse(PRL.indiv_Kurtosis<2.7,
                                    Platykurtic,Mesokurtic))

PRL.indiv_analise.cor.Kurtosis<-ifelse(PRL.indiv_conclusao.Kurtosis==Mesokurtic,
                                "ok","nao ok")

################################################################################
### 3.2.4 Analise Assimetria - resultados individuais - Prolactina _____________
################################################################################

PRL.indiv_dp<-round(sd(base_de_dados_indiv$eixo_x),digits=3)
PRL.indiv_media<-round(mean(base_de_dados_indiv$eixo_x),digits=3)
PRL.indiv_mediana<-round(median(base_de_dados_indiv$eixo_x),digits=3)
PRL.indiv_moda<-locmodes(base_de_dados_indiv$eixo_x,mod0=1)
## Warning in locmodes(base_de_dados_indiv$eixo_x, mod0 = 1): If the density
## function has an unbounded support, artificial modes may have been created in
## the tails
PRL.indiv_moda<-round(PRL.indiv_moda$locations,digits=3)

PRL.indiv_neg.assimetria<-ifelse(PRL.indiv_moda>PRL.indiv_mediana & 
                                 PRL.indiv_moda>PRL.indiv_media & 
                                 PRL.indiv_mediana>PRL.indiv_media,"ok",
                                 "nao ok")

PRL.indiv_pos.assimetria<-ifelse(PRL.indiv_media>PRL.indiv_mediana & 
                                 PRL.indiv_media>PRL.indiv_moda & 
                                 PRL.indiv_mediana>PRL.indiv_moda,"ok",
                                 "nao ok")

PRL.indiv_conclusao.assimetria.1<-ifelse(PRL.indiv_pos.assimetria=="ok" & 
 PRL.indiv_neg.assimetria=="nao ok",
 "A relação empírica entre Média, Mediana e Moda é: Média > Mediana > Moda. ",
 ifelse(PRL.indiv_pos.assimetria=="nao ok" & PRL.indiv_neg.assimetria=="ok",
 "Relação empírica entre Média, Mediana e Moda: Moda > Mediana > Média.",""))

PRL.indiv_assimetria<-signif(round(skewness(base_de_dados_indiv$eixo_x),digits=2),4)

PRL.indiv_magnitude.assimetria<-ifelse(abs(PRL.indiv_assimetria)>1,
                              "Essa distribuição é muito assimétrica",
                              ifelse(abs(PRL.indiv_assimetria)>0.5 & 
                              abs(PRL.indiv_assimetria)<=1,
                              "Esta distribuição é assimétrica",
                              ifelse(abs(PRL.indiv_assimetria)>0.15 & 
                              abs(PRL.indiv_assimetria)<=0.5,
                              "A distribuição é moderadamente assimétrica",
                              "A distribuição é aproximadamente simétrica.")))

PRL.indiv_sinal.assimetria<-ifelse(PRL.indiv_assimetria >0.15," para a direita.",
                  ifelse(PRL.indiv_assimetria <(-0.15)," para a esquerda.",""))

PRL.indiv_conclusao.assimetria.2<-paste(PRL.indiv_magnitude.assimetria,
                                PRL.indiv_sinal.assimetria)

PRL.indiv_conclusao.assimetria<-paste(PRL.indiv_conclusao.assimetria.1,
                                PRL.indiv_conclusao.assimetria.2)

PRL.indiv_analise.cor.assimetria<-ifelse(PRL.indiv_magnitude.assimetria==
                          "Essa distribuição é muito distorcida",
                          "nao ok","ok")

################################################################################
### 3.2.5 Medidas tendencia central - resultados individuais - Prolactina ______
################################################################################

PRL.indiv_moda.texto<-paste("Moda: ",PRL.indiv_moda)
PRL.indiv_media.texto<-paste("Média: ",PRL.indiv_media)
PRL.indiv_mediana.texto<-paste("Mediana: ",PRL.indiv_mediana)

PRL.indiv_g1 = qplot(x="",y=base_de_dados_indiv$eixo_x,
                    data=data.frame(base_de_dados_indiv$eixo_x),xlab="",
                    geom="boxplot") +
                    labs(y=rotulo_eixo_x) + coord_flip() +
                    theme(legend.position="top",
                    legend.text  = element_text (color="black",size =11 ),
                    axis.title = element_text(size = 12),
                    axis.text  = element_text (color="black",size = 11 ))
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
PRL.indiv_g2 = qplot(x=base_de_dados_indiv$eixo_x,
  data=data.frame(base_de_dados_indiv$eixo_x),geom="density") + 
  labs(title=paste("Fig. 5. Density plot e Box-Plot (n = ",
  n_y," )."),x=rotulo_eixo_x,y="Densidade") +
  geom_density(color="sienna3",alpha = 0.6,fill="sienna1") +
  stat_function(fun = function(x) dnorm(x, mean = PRL.indiv_media,
  sd = PRL.indiv_dp),color = "black", size = 1.2) +
  geom_vline(aes(xintercept=PRL.indiv_moda,colour=PRL.indiv_moda.texto),
  linewidth =1.2,linetype="solid") +
  geom_vline(aes(xintercept = PRL.indiv_mediana,colour=PRL.indiv_mediana.texto),
  linewidth =1.2,linetype="solid") +
  geom_vline(aes(xintercept = PRL.indiv_media,colour=PRL.indiv_media.texto),
  linewidth =1.2,linetype="solid") +
  scale_colour_manual("",breaks = c(PRL.indiv_moda.texto,
                                    PRL.indiv_mediana.texto,
                                    PRL.indiv_media.texto),
                      values = c("red","springgreen4","blue2")) +
  theme(legend.position="top",
        axis.title = element_text(size = 12),
        legend.text  = element_text (color="black",size =11 ),
        axis.text  = element_text (color="black",size = 11 ),
        plot.title=element_text(size=10,face = "bold"))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
fig.5<-ggarrange(PRL.indiv_g2, PRL.indiv_g1, heights = c(2, 1), align = "v", 
                 ncol = 1, nrow = 2)
fig.5

ggsave("4_Figuras/Fig.5_Densidade_Box_Plot_PRL_individuais.png",
       plot = fig.5, device = "png")
## Saving 7 x 5 in image
################################################################################
### 3.2.6 Tabela shape - resultados individuais - Prolactina ___________________
################################################################################

PRL.indiv_lista.resultados.shape<-c(PRL.indiv_Kurtosis,
                            PRL.indiv_conclusao.Kurtosis,
                            PRL.indiv_assimetria,
                            PRL.indiv_conclusao.assimetria)

Tabela8<-data.frame(lista.nomes.shape,PRL.indiv_lista.resultados.shape)

kable(Tabela8,
      col.names = c("Parâmetros estatísticos","Resultados"),
      align = "cc",
      caption = paste("Tabela 8. Distribuição dos resultados individuais de",
                      rotulo_eixo_x1," (n =",n_x,").")) %>%
  kable_styling(full_width = FALSE,
      bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  row_spec(2,bold = TRUE,color = "black",
  background = ifelse(is.na(PRL.indiv_analise.cor.Kurtosis)==TRUE,"gold",
  ifelse(PRL.indiv_analise.cor.Kurtosis=="ok","palegreen","gold"))) %>%
  row_spec(4,bold = TRUE,color = "black",
  background = ifelse(is.na(PRL.indiv_analise.cor.assimetria)==TRUE,"gold",
  ifelse(PRL.indiv_analise.cor.assimetria=="ok","palegreen","gold"))) %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2") %>%
  footnote(general_title = "",general = "") %>%
  footnote(general_title = "",general = "Nota:") %>% 
  footnote(general_title = "",
  general = "Se a curtose estiver entre 2,7 e 3,3, a distribuição pode ser considerada mesocúrtica.") %>%  
  footnote(general_title = "",
           general = "Se a curtose for menor que 2,7, a distribuição é Platicúrtica.") %>%
  footnote(general_title = "",
           general = "Se a curtose for maior que 3,3, a distribuição é Leptocúrtica.") %>%
  footnote(general_title = "",
           general = "Se a assimetria for menor que -1 ou maior que 1, a distribuição é altamente assimétrica.") %>%
  footnote(general_title = "",
           general = "Se a assimetria estiver entre -1 e -0,5 ou entre 0,5 e 1, a distribuição é moderadamente assimétrica.") %>%
  footnote(general_title = "",
           general = "Se a assimetria estiver entre -0,5 e -0,15 ou entre 0,15 e 0,5, a distribuição é moderadamente assimétrica.") %>%
  footnote(general_title = "",
           general = "Se a assimetria estiver entre -0,15 e 0,15, a distribuição é aproximadamente simétrica.")
Tabela 8. Distribuição dos resultados individuais de Prolactina (n = 65795 ).
Parâmetros estatísticos Resultados
Coeficiente de curtose 107.95
Interpretação do coeficiente de curtose A distribuição pode ser considerada leptocúrtica.
Coeficiente de assimetria 7.73
Interpretando o resultado do coeficiente de assimetria Essa distribuição é muito assimétrica para a direita.
Nota:
Se a curtose estiver entre 2,7 e 3,3, a distribuição pode ser considerada mesocúrtica.
Se a curtose for menor que 2,7, a distribuição é Platicúrtica.
Se a curtose for maior que 3,3, a distribuição é Leptocúrtica.
Se a assimetria for menor que -1 ou maior que 1, a distribuição é altamente assimétrica.
Se a assimetria estiver entre -1 e -0,5 ou entre 0,5 e 1, a distribuição é moderadamente assimétrica.
Se a assimetria estiver entre -0,5 e -0,15 ou entre 0,15 e 0,5, a distribuição é moderadamente assimétrica.
Se a assimetria estiver entre -0,15 e 0,15, a distribuição é aproximadamente simétrica.
################################################################################
### 3.2.7 Analise Curtose - resultados individuais - Variavel Desfecho _________
################################################################################

curtose<-function(dataset) {
  require(moments)
  round(kurtosis(dataset),2)
}


Y.indiv_Kurtosis<-curtose(base_de_dados_indiv$eixo_y)

Y.indiv_conclusao.Kurtosis<-ifelse(Y.indiv_Kurtosis>3.3,Leptokurtic,
                            ifelse(Y.indiv_Kurtosis<2.7,
                            Platykurtic,Mesokurtic))

Y.indiv_analise.cor.Kurtosis<-ifelse(Y.indiv_conclusao.Kurtosis==Mesokurtic,
                              "ok","nao ok")

################################################################################
### 3.2.8 Analise Assimetria - resultados individuais - Variavel Desfecho ______
################################################################################

Y.indiv_dp<-round(sd(base_de_dados_indiv$eixo_y),digits=3)
Y.indiv_media<-round(mean(base_de_dados_indiv$eixo_y),digits=3)
Y.indiv_mediana<-round(median(base_de_dados_indiv$eixo_y),digits=3)
Y.indiv_moda<-locmodes(base_de_dados_indiv$eixo_y,mod0=1)
## Warning in locmodes(base_de_dados_indiv$eixo_y, mod0 = 1): If the density
## function has an unbounded support, artificial modes may have been created in
## the tails
Y.indiv_moda<-round(Y.indiv_moda$locations,digits=3)

Y.indiv_neg.assimetria<-ifelse(Y.indiv_moda>Y.indiv_mediana & 
                                 Y.indiv_moda>Y.indiv_media & 
                                 Y.indiv_mediana>Y.indiv_media,"ok",
                                 "nao ok")

Y.indiv_pos.assimetria<-ifelse(Y.indiv_media>Y.indiv_mediana & 
                                 Y.indiv_media>Y.indiv_moda & 
                                 Y.indiv_mediana>Y.indiv_moda,"ok","nao ok")

Y.indiv_conclusao.assimetria.1<-ifelse(Y.indiv_pos.assimetria=="ok" & 
 Y.indiv_neg.assimetria=="nao ok",
 "A relação empírica entre Média, Mediana e Moda é: Média > Mediana > Moda. ",
 ifelse(Y.indiv_pos.assimetria=="nao ok" & Y.indiv_neg.assimetria=="ok",
 "Relação empírica entre Média, Mediana e Moda: Moda > Mediana > Média.",""))

Y.indiv_assimetria<-signif(round(skewness(base_de_dados_indiv$eixo_y),digits=2),4)

Y.indiv_magnitude.assimetria<-ifelse(abs(Y.indiv_assimetria)>1,
                              "Essa distribuição é muito assimétrica",
                              ifelse(abs(Y.indiv_assimetria)>0.5 & 
                              abs(Y.indiv_assimetria)<=1,
                              "Esta distribuição é assimétrica",
                              ifelse(abs(Y.indiv_assimetria)>0.15 & 
                              abs(Y.indiv_assimetria)<=0.5,
                              "A distribuição é moderadamente assimétrica",
                              "A distribuição é aproximadamente simétrica.")))

Y.indiv_sinal.assimetria<-ifelse(Y.indiv_assimetria >0.15," para a direita.",
                  ifelse(Y.indiv_assimetria <(-0.15)," para a esquerda.",""))

Y.indiv_conclusao.assimetria.2<-paste(Y.indiv_magnitude.assimetria,
                                        Y.indiv_sinal.assimetria)

Y.indiv_conclusao.assimetria<-paste(Y.indiv_conclusao.assimetria.1,
                              Y.indiv_conclusao.assimetria.2)

Y.indiv_analise.cor.assimetria<-ifelse(Y.indiv_magnitude.assimetria==
                                 "Essa distribuição é muito distorcida",
                                  "nao ok","ok")

################################################################################
### 3.2.9 Medidas tendencia central - resultados individuais - Variavel Desfecho 
################################################################################

Y.indiv_moda.texto<-paste("Moda: ",Y.indiv_moda)
Y.indiv_media.texto<-paste("Média: ",Y.indiv_media)
Y.indiv_mediana.texto<-paste("Mediana: ",Y.indiv_mediana)

Y.indiv_g1= qplot(x="",y=base_de_dados_indiv$eixo_y,
                    data=data.frame(base_de_dados_indiv$eixo_y),xlab="",
                    geom="boxplot") +
                    labs(y=rotulo_eixo_y) + coord_flip() +
                    theme(legend.position="top",
                    legend.text  = element_text (color="black",size =11 ),
                    axis.title = element_text(size = 12),
                    axis.text  = element_text (color="black",size = 11 ))

Y.indiv_g2= qplot(x=base_de_dados_indiv$eixo_y,
  data=data.frame(base_de_dados_indiv$eixo_y),geom="density") + 
  labs(title=paste("Fig. 6. Density plot e Box-Plot (n = ",
  n_y," )."),x=rotulo_eixo_y,y="Densidade") +
  geom_density(color="sienna3",alpha = 0.6,fill="sienna1") +
  stat_function(fun = function(x) dnorm(x, mean = Y.indiv_media,
  sd = Y.indiv_dp),color = "black", size = 1.2) +
  geom_vline(aes(xintercept=Y.indiv_moda,colour=Y.indiv_moda.texto),
  linewidth =1.2,linetype="solid") +
  geom_vline(aes(xintercept = Y.indiv_mediana,colour=Y.indiv_mediana.texto),
  linewidth =1.2,linetype="solid") +
  geom_vline(aes(xintercept = Y.indiv_media,colour=Y.indiv_media.texto),
  linewidth =1.2,linetype="solid") +
  scale_colour_manual("",breaks = c(Y.indiv_moda.texto,
                                    Y.indiv_mediana.texto,
                                    Y.indiv_media.texto),
                      values = c("red","springgreen4","blue2")) +
  theme(legend.position="top",
        axis.title = element_text(size = 12),
        legend.text  = element_text (color="black",size =11 ),
        axis.text  = element_text (color="black",size = 11 ),
        plot.title=element_text(size=10,face = "bold"))

fig.6<-ggarrange(Y.indiv_g2, Y.indiv_g1, heights = c(2, 1), align = "v", 
                 ncol = 1, nrow = 2)
fig.6

ggsave("4_Figuras/Fig.6_Densidade_Box_Plot_Variável_Desfecho_individuais.png",
       plot = fig.6, device = "png")
## Saving 7 x 5 in image
################################################################################
### 3.2.10 Tabela shape - resultados individuais - Variavel Desfecho ___________
################################################################################

Y.indiv_lista.resultados.shape<-c(Y.indiv_Kurtosis,
                                  Y.indiv_conclusao.Kurtosis,
                                  Y.indiv_assimetria,
                                  Y.indiv_conclusao.assimetria)

Tabela9<-data.frame(lista.nomes.shape,Y.indiv_lista.resultados.shape)

kable(Tabela9,
      col.names = c("Parâmetros estatísticos","Resultados"),
      align = "cc",
      caption = paste("Tabela 9. Distribuição de resultados individuais de",rotulo_eixo_y1,
                      "(n =",n_y,").")) %>%
  kable_styling(full_width = FALSE,
      bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  row_spec(2,bold = TRUE,color = "black",
  background = ifelse(is.na(Y.indiv_analise.cor.Kurtosis)==TRUE,"gold",
  ifelse(Y.indiv_analise.cor.Kurtosis=="ok","palegreen","gold"))) %>%
  row_spec(4,bold = TRUE,color = "black",
  background = ifelse(is.na(Y.indiv_analise.cor.assimetria)==TRUE,"gold",
  ifelse(Y.indiv_analise.cor.assimetria=="ok","palegreen","gold"))) %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2") %>%
  footnote(general_title = "",general = "") %>%
  footnote(general_title = "",general = "Nota:") %>% 
  footnote(general_title = "",
  general = "Se a curtose estiver entre 2,7 e 3,3, a distribuição pode ser considerada mesocúrtica.") %>%  
  footnote(general_title = "",
           general = "Se a curtose for menor que 2,7, a distribuição é Platicúrtica.") %>%
  footnote(general_title = "",
           general = "Se a curtose for maior que 3,3, a distribuição é Leptocúrtica.") %>%
  footnote(general_title = "",
           general = "Se a assimetria for menor que -1 ou maior que 1, a distribuição é altamente assimétrica.") %>%
  footnote(general_title = "",
           general = "Se a assimetria estiver entre -1 e -0,5 ou entre 0,5 e 1, a distribuição é moderadamente assimétrica.") %>%
  footnote(general_title = "",
           general = "Se a assimetria estiver entre -0,5 e -0,15 ou entre 0,15 e 0,5, a distribuição é moderadamente assimétrica.") %>%
  footnote(general_title = "",
           general = "Se a assimetria estiver entre -0,15 e 0,15, a distribuição é aproximadamente simétrica.")
Tabela 9. Distribuição de resultados individuais de HOMA-IR (n = 65795 ).
Parâmetros estatísticos Resultados
Coeficiente de curtose 510.22
Interpretação do coeficiente de curtose A distribuição pode ser considerada leptocúrtica.
Coeficiente de assimetria 13.64
Interpretando o resultado do coeficiente de assimetria Essa distribuição é muito assimétrica para a direita.
Nota:
Se a curtose estiver entre 2,7 e 3,3, a distribuição pode ser considerada mesocúrtica.
Se a curtose for menor que 2,7, a distribuição é Platicúrtica.
Se a curtose for maior que 3,3, a distribuição é Leptocúrtica.
Se a assimetria for menor que -1 ou maior que 1, a distribuição é altamente assimétrica.
Se a assimetria estiver entre -1 e -0,5 ou entre 0,5 e 1, a distribuição é moderadamente assimétrica.
Se a assimetria estiver entre -0,5 e -0,15 ou entre 0,15 e 0,5, a distribuição é moderadamente assimétrica.
Se a assimetria estiver entre -0,15 e 0,15, a distribuição é aproximadamente simétrica.
################################################################################
## 3.2 Pressupostos dos resultados médios ######################################
################################################################################

################################################################################
### 3.2.1 Teste de homogeneidade das variâncias - resultados médios ____________
################################################################################

base_de_dados$Grupo <- as.factor(base_de_dados$Grupo)
levene_result_medios <-leveneTest(eixo_y ~ Grupo, data = base_de_dados)

levene_results_medios_interpretacao<-ifelse(levene_result_medios[1, 3] > alpha,
                    paste("Os grupos apresentam variâncias homogêneas (p =", 
                    round(levene_result_medios[1, 3], 4), "> 0.005)."),
                    paste("Os grupos não apresentam variâncias homogêneas (p =", 
                    round(levene_result_medios[1, 3], 4), "<= 0.005)."))

################################################################################
### 3.2.2 Tabela Teste de homogeneidade das variâncias - resultados médios _____
################################################################################

nome_coluna_tabela10<-c("GL","Valor F", "p-valor")
Tabela10<-data.frame(levene_result_medios)
Titulo_Tabela10<-"Tabela 10. Resultado do Teste de Homogeneidade das Variâncias (Levene) - resultados médios:"

kable(Tabela10,col.names = nome_coluna_tabela10,
  align = "cc",caption = Titulo_Tabela10) %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2") %>%  
  footnote(general_title = "Interpretação",
           general = levene_results_medios_interpretacao)
Tabela 10. Resultado do Teste de Homogeneidade das Variâncias (Levene) - resultados médios:
GL Valor F p-valor
group 3 18.35659 0
102 NA NA
Interpretação
Os grupos não apresentam variâncias homogêneas (p = 0 <= 0.005).
################################################################################
### 3.2.3 Analise Curtose - resultados médios - Prolactina _____________________
################################################################################

curtose<-function(dataset) {
  require(moments)
  round(kurtosis(dataset),2)
}

PRL.medio_Kurtosis<-curtose(base_de_dados$eixo_x)

PRL.medio_conclusao.Kurtosis<-ifelse(PRL.medio_Kurtosis>3.3,Leptokurtic,
                             ifelse(PRL.medio_Kurtosis<2.7,
                                    Platykurtic,Mesokurtic))

PRL.medio_analise.cor.Kurtosis<-ifelse(PRL.medio_conclusao.Kurtosis==Mesokurtic,
                                "ok","nao ok")

################################################################################
### 3.2.4 Analise Assimetria - resultados médios - Prolactina __________________
################################################################################

PRL.medio_dp<-round(sd(base_de_dados$eixo_x),digits=3)
PRL.medio_media<-round(mean(base_de_dados$eixo_x),digits=3)
PRL.medio_mediana<-round(median(base_de_dados$eixo_x),digits=3)
PRL.medio_moda<-locmodes(base_de_dados$eixo_x,mod0=1)
## Warning in locmodes(base_de_dados$eixo_x, mod0 = 1): If the density function
## has an unbounded support, artificial modes may have been created in the tails
PRL.medio_moda<-round(PRL.medio_moda$locations,digits=3)

PRL.medio_neg.assimetria<-ifelse(PRL.medio_moda>PRL.medio_mediana & 
                                 PRL.medio_moda>PRL.medio_media & 
                                 PRL.medio_mediana>PRL.medio_media,"ok",
                                 "nao ok")

PRL.medio_pos.assimetria<-ifelse(PRL.medio_media>PRL.medio_mediana & 
                                 PRL.medio_media>PRL.medio_moda & 
                                 PRL.medio_mediana>PRL.medio_moda,"ok",
                                 "nao ok")

PRL.medio_conclusao.assimetria.1<-ifelse(PRL.medio_pos.assimetria=="ok" & 
 PRL.medio_neg.assimetria=="nao ok",
 "A relação empírica entre Média, Mediana e Moda é: Média > Mediana > Moda. ",
 ifelse(PRL.medio_pos.assimetria=="nao ok" & PRL.medio_neg.assimetria=="ok",
 "Relação empírica entre Média, Mediana e Moda: Moda > Mediana > Média.",""))

PRL.medio_assimetria<-signif(round(skewness(base_de_dados$eixo_x),digits=2),4)

PRL.medio_magnitude.assimetria<-ifelse(abs(PRL.medio_assimetria)>1,
                              "Essa distribuição é muito assimétrica",
                              ifelse(abs(PRL.medio_assimetria)>0.5 & 
                              abs(PRL.medio_assimetria)<=1,
                              "Esta distribuição é assimétrica",
                              ifelse(abs(PRL.medio_assimetria)>0.15 & 
                              abs(PRL.medio_assimetria)<=0.5,
                              "A distribuição é moderadamente assimétrica",
                              "A distribuição é aproximadamente simétrica.")))

PRL.medio_sinal.assimetria<-ifelse(PRL.medio_assimetria >0.15," para a direita.",
                  ifelse(PRL.medio_assimetria <(-0.15)," para a esquerda.",""))

PRL.medio_conclusao.assimetria.2<-paste(PRL.medio_magnitude.assimetria,
                                PRL.medio_sinal.assimetria)

PRL.medio_conclusao.assimetria<-paste(PRL.medio_conclusao.assimetria.1,
                              PRL.medio_conclusao.assimetria.2)

PRL.medio_analise.cor.assimetria<-ifelse(PRL.medio_magnitude.assimetria==
                          "Essa distribuição é muito distorcida",
                          "nao ok","ok")

################################################################################
### 3.2.5 Medidas tendencia central - resultados médios - Prolactina ___________
################################################################################

PRL.medio_moda.texto<-paste("Moda: ",PRL.medio_moda)
PRL.medio_media.texto<-paste("Média: ",PRL.medio_media)
PRL.medio_mediana.texto<-paste("Mediana: ",PRL.medio_mediana)

PRL.medio_g1= qplot(x="",y=base_de_dados$eixo_x,
                    data=data.frame(base_de_dados$eixo_x),xlab="",
                    geom="boxplot") +
                    labs(y=rotulo_eixo_x_médio) + coord_flip() +
                    theme(legend.position="top",
                    legend.text  = element_text (color="black",size =11 ),
                    axis.title = element_text(size = 12),
                    axis.text  = element_text (color="black",size = 11 ))

PRL.medio_g2= qplot(x=base_de_dados$eixo_x,
  data=data.frame(base_de_dados$eixo_x),geom="density") + 
  labs(title=paste("Fig. 7. Density plot e Box-Plot (n = ",
  n_y_medios," )."),x=rotulo_eixo_x_médio,y="Densidade") +
  geom_density(color="sienna3",alpha = 0.6,fill="sienna1") +
  stat_function(fun = function(x) dnorm(x, mean = PRL.medio_media,
  sd = PRL.medio_dp),color = "black", size = 1.2) +
  geom_vline(aes(xintercept=PRL.medio_moda,colour=PRL.medio_moda.texto),
  linewidth =1.2,linetype="solid") +
  geom_vline(aes(xintercept = PRL.medio_mediana,colour=PRL.medio_mediana.texto),
  linewidth =1.2,linetype="solid") +
  geom_vline(aes(xintercept = PRL.medio_media,colour=PRL.medio_media.texto),
  linewidth =1.2,linetype="solid") +
  scale_colour_manual("",breaks = c(PRL.medio_moda.texto,
                                    PRL.medio_mediana.texto,
                                    PRL.medio_media.texto),
                      values = c("red","springgreen4","blue2")) +
  theme(legend.position="top",
        axis.title = element_text(size = 12),
        legend.text  = element_text (color="black",size =11 ),
        axis.text  = element_text (color="black",size = 11 ),
        plot.title=element_text(size=10,face = "bold"))

fig.7<-ggarrange(PRL.medio_g2, PRL.medio_g1, heights = c(2, 1), align = "v", 
                 ncol = 1, nrow = 2)
fig.7

ggsave("4_Figuras/Fig.7_Densidade_Box_Plot_PRL_médios.png",
        plot = fig.7, device = "png")
## Saving 7 x 5 in image
################################################################################
### 3.2.6 Tabela shape - resultados médios - Prolactina ________________________
################################################################################

PRL.medio_lista.resultados.shape<-c(PRL.medio_Kurtosis,
                            PRL.medio_conclusao.Kurtosis,
                            PRL.medio_assimetria,
                            PRL.medio_conclusao.assimetria)

Tabela11<-data.frame(lista.nomes.shape,PRL.medio_lista.resultados.shape)

kable(Tabela11,
      col.names = c("Parâmetros estatísticos","Resultados"),
      align = "cc",
      caption = paste("Tabela 11. Distribuição dos resultados médios de",
                      rotulo_eixo_x1," (n =",n_y_medios,").")) %>%
  kable_styling(full_width = FALSE,
      bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  row_spec(2,bold = TRUE,color = "black",
  background = ifelse(is.na(PRL.medio_analise.cor.Kurtosis)==TRUE,"gold",
  ifelse(PRL.medio_analise.cor.Kurtosis=="ok","palegreen","gold"))) %>%
  row_spec(4,bold = TRUE,color = "black",
  background = ifelse(is.na(PRL.medio_analise.cor.assimetria)==TRUE,"gold",
  ifelse(PRL.medio_analise.cor.assimetria=="ok","palegreen","gold"))) %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2") %>%
  footnote(general_title = "",general = "") %>%
  footnote(general_title = "",general = "Nota:") %>% 
  footnote(general_title = "",
  general = "Se a curtose estiver entre 2,7 e 3,3, a distribuição pode ser considerada mesocúrtica.") %>%  
  footnote(general_title = "",
           general = "Se a curtose for menor que 2,7, a distribuição é Platicúrtica.") %>%
  footnote(general_title = "",
           general = "Se a curtose for maior que 3,3, a distribuição é Leptocúrtica.") %>%
  footnote(general_title = "",
           general = "Se a assimetria for menor que -1 ou maior que 1, a distribuição é altamente assimétrica.") %>%
  footnote(general_title = "",
           general = "Se a assimetria estiver entre -1 e -0,5 ou entre 0,5 e 1, a distribuição é moderadamente assimétrica.") %>%
  footnote(general_title = "",
           general = "Se a assimetria estiver entre -0,5 e -0,15 ou entre 0,15 e 0,5, a distribuição é moderadamente assimétrica.") %>%
  footnote(general_title = "",
           general = "Se a assimetria estiver entre -0,15 e 0,15, a distribuição é aproximadamente simétrica.")
Tabela 11. Distribuição dos resultados médios de Prolactina (n = 106 ).
Parâmetros estatísticos Resultados
Coeficiente de curtose 16.27
Interpretação do coeficiente de curtose A distribuição pode ser considerada leptocúrtica.
Coeficiente de assimetria 3.47
Interpretando o resultado do coeficiente de assimetria Essa distribuição é muito assimétrica para a direita.
Nota:
Se a curtose estiver entre 2,7 e 3,3, a distribuição pode ser considerada mesocúrtica.
Se a curtose for menor que 2,7, a distribuição é Platicúrtica.
Se a curtose for maior que 3,3, a distribuição é Leptocúrtica.
Se a assimetria for menor que -1 ou maior que 1, a distribuição é altamente assimétrica.
Se a assimetria estiver entre -1 e -0,5 ou entre 0,5 e 1, a distribuição é moderadamente assimétrica.
Se a assimetria estiver entre -0,5 e -0,15 ou entre 0,15 e 0,5, a distribuição é moderadamente assimétrica.
Se a assimetria estiver entre -0,15 e 0,15, a distribuição é aproximadamente simétrica.
################################################################################
### 3.2.7 Analise Curtose - resultados médios - Variavel Desfecho ______________
################################################################################

curtose<-function(dataset) {
  require(moments)
  round(kurtosis(dataset),2)
}


Y.medio_Kurtosis<-curtose(base_de_dados$eixo_y)

Y.medio_conclusao.Kurtosis<-ifelse(Y.medio_Kurtosis>3.3,Leptokurtic,
                            ifelse(Y.medio_Kurtosis<2.7,
                            Platykurtic,Mesokurtic))

Y.medio_analise.cor.Kurtosis<-ifelse(Y.medio_conclusao.Kurtosis==Mesokurtic,
                                "ok","nao ok")

################################################################################
### 3.2.8 Analise Assimetria - resultados médios - Variavel Desfecho ___________
################################################################################

Y.medio_dp<-round(sd(base_de_dados$eixo_y),digits=3)
Y.medio_media<-round(mean(base_de_dados$eixo_y),digits=3)
Y.medio_mediana<-round(median(base_de_dados$eixo_y),digits=3)
Y.medio_moda<-locmodes(base_de_dados$eixo_y,mod0=1)
## Warning in locmodes(base_de_dados$eixo_y, mod0 = 1): If the density function
## has an unbounded support, artificial modes may have been created in the tails
Y.medio_moda<-round(Y.medio_moda$locations,digits=3)

Y.medio_neg.assimetria<-ifelse(Y.medio_moda>Y.medio_mediana & 
                                 Y.medio_moda>Y.medio_media & 
                                 Y.medio_mediana>Y.medio_media,"ok",
                                 "nao ok")

Y.medio_pos.assimetria<-ifelse(Y.medio_media>Y.medio_mediana & 
                                 Y.medio_media>Y.medio_moda & 
                                 Y.medio_mediana>Y.medio_moda,"ok","nao ok")

Y.medio_conclusao.assimetria.1<-ifelse(Y.medio_pos.assimetria=="ok" & 
 Y.medio_neg.assimetria=="nao ok",
 "A relação empírica entre Média, Mediana e Moda é: Média > Mediana > Moda. ",
 ifelse(Y.medio_pos.assimetria=="nao ok" & Y.medio_neg.assimetria=="ok",
 "Relação empírica entre Média, Mediana e Moda: Moda > Mediana > Média.",""))

Y.medio_assimetria<-signif(round(skewness(base_de_dados$eixo_y),digits=2),4)

Y.medio_magnitude.assimetria<-ifelse(abs(Y.medio_assimetria)>1,
                              "Essa distribuição é muito assimétrica",
                              ifelse(abs(Y.medio_assimetria)>0.5 & 
                              abs(Y.medio_assimetria)<=1,
                              "Esta distribuição é assimétrica",
                              ifelse(abs(Y.medio_assimetria)>0.15 & 
                              abs(Y.medio_assimetria)<=0.5,
                              "A distribuição é moderadamente assimétrica",
                              "A distribuição é aproximadamente simétrica.")))

Y.medio_sinal.assimetria<-ifelse(Y.medio_assimetria >0.15," para a direita.",
                  ifelse(Y.medio_assimetria <(-0.15)," para a esquerda.",""))

Y.medio_conclusao.assimetria.2<-paste(Y.medio_magnitude.assimetria,
                                        Y.medio_sinal.assimetria)

Y.medio_conclusao.assimetria<-paste(Y.medio_conclusao.assimetria.1,
                              Y.medio_conclusao.assimetria.2)

Y.medio_analise.cor.assimetria<-ifelse(Y.medio_magnitude.assimetria==
                                 "Essa distribuição é muito distorcida",
                                  "nao ok","ok")

################################################################################
### 3.2.9 Medidas tendencia central - resultados médios - Variavel Desfecho ____
################################################################################

Y.medio_moda.texto<-paste("Moda: ",Y.medio_moda)
Y.medio_media.texto<-paste("Média: ",Y.medio_media)
Y.medio_mediana.texto<-paste("Mediana: ",Y.medio_mediana)

Y.medio_g1= qplot(x="",y=base_de_dados$eixo_y,
                    data=data.frame(base_de_dados$eixo_y),xlab="",
                    geom="boxplot") +
                    labs(y=rotulo_eixo_y_médio2) + coord_flip() +
                    theme(legend.position="top",
                    legend.text  = element_text (color="black",size =11 ),
                    axis.title = element_text(size = 12),
                    axis.text  = element_text (color="black",size = 11 ))

Y.medio_g2= qplot(x=base_de_dados$eixo_y,
  data=data.frame(base_de_dados$eixo_y),geom="density") + 
  labs(title=paste("Fig. 6. Density plot e Box-Plot (n = ",
  n_y_medios," )."),x=rotulo_eixo_y_médio,y="Densidade") +
  geom_density(color="sienna3",alpha = 0.6,fill="sienna1") +
  stat_function(fun = function(x) dnorm(x, mean = Y.medio_media,
  sd = Y.medio_dp),color = "black", size = 1.2) +
  geom_vline(aes(xintercept=Y.medio_moda,colour=Y.medio_moda.texto),
  linewidth =1.2,linetype="solid") +
  geom_vline(aes(xintercept = Y.medio_mediana,colour=Y.medio_mediana.texto),
  linewidth =1.2,linetype="solid") +
  geom_vline(aes(xintercept = Y.medio_media,colour=Y.medio_media.texto),
  linewidth =1.2,linetype="solid") +
  scale_colour_manual("",breaks = c(Y.medio_moda.texto,
                                    Y.medio_mediana.texto,
                                    Y.medio_media.texto),
                      values = c("red","springgreen4","blue2")) +
  theme(legend.position="top",
        axis.title = element_text(size = 12),
        legend.text  = element_text (color="black",size =11 ),
        axis.text  = element_text (color="black",size = 11 ),
        plot.title=element_text(size=10,face = "bold"))

fig.8<-ggarrange(Y.medio_g2, Y.medio_g1, heights = c(2, 1), align = "v", 
                 ncol = 1, nrow = 2)
fig.8

ggsave("4_Figuras/Fig.8_Densidade_Box_Plot_Variavel_Desfecho_médios.png",
        plot = fig.8, device = "png")
## Saving 7 x 5 in image
################################################################################
### 3.2.10 Tabela shape - resultados médios - Variavel Desfecho ________________
################################################################################

Y.medio_lista.resultados.shape<-c(Y.medio_Kurtosis,
                                    Y.medio_conclusao.Kurtosis,
                                    Y.medio_assimetria,
                                    Y.medio_conclusao.assimetria)

Tabela12<-data.frame(lista.nomes.shape,Y.medio_lista.resultados.shape)

kable(Tabela12,
      col.names = c("Parâmetros estatísticos","Resultados"),
      align = "cc",
      caption = paste("Tabela 12. Distribuição de resultados médios de",rotulo_eixo_y1,
                      "(n =",n_y_medios,").")) %>%
  kable_styling(full_width = FALSE,
      bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  row_spec(2,bold = TRUE,color = "black",
  background = ifelse(is.na(Y.medio_analise.cor.Kurtosis)==TRUE,"gold",
  ifelse(Y.medio_analise.cor.Kurtosis=="ok","palegreen","gold"))) %>%
  row_spec(4,bold = TRUE,color = "black",
  background = ifelse(is.na(Y.medio_analise.cor.assimetria)==TRUE,"gold",
  ifelse(Y.medio_analise.cor.assimetria=="ok","palegreen","gold"))) %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2") %>%
  footnote(general_title = "",general = "") %>%
  footnote(general_title = "",general = "Nota:") %>% 
  footnote(general_title = "",
  general = "Se a curtose estiver entre 2,7 e 3,3, a distribuição pode ser considerada mesocúrtica.") %>%  
  footnote(general_title = "",
           general = "Se a curtose for menor que 2,7, a distribuição é Platicúrtica.") %>%
  footnote(general_title = "",
           general = "Se a curtose for maior que 3,3, a distribuição é Leptocúrtica.") %>%
  footnote(general_title = "",
           general = "Se a assimetria for menor que -1 ou maior que 1, a distribuição é altamente assimétrica.") %>%
  footnote(general_title = "",
           general = "Se a assimetria estiver entre -1 e -0,5 ou entre 0,5 e 1, a distribuição é moderadamente assimétrica.") %>%
  footnote(general_title = "",
           general = "Se a assimetria estiver entre -0,5 e -0,15 ou entre 0,15 e 0,5, a distribuição é moderadamente assimétrica.") %>%
  footnote(general_title = "",
           general = "Se a assimetria estiver entre -0,15 e 0,15, a distribuição é aproximadamente simétrica.")
Tabela 12. Distribuição de resultados médios de HOMA-IR (n = 106 ).
Parâmetros estatísticos Resultados
Coeficiente de curtose 3.89
Interpretação do coeficiente de curtose A distribuição pode ser considerada leptocúrtica.
Coeficiente de assimetria 1.02
Interpretando o resultado do coeficiente de assimetria A relação empírica entre Média, Mediana e Moda é: Média > Mediana > Moda. Essa distribuição é muito assimétrica para a direita.
Nota:
Se a curtose estiver entre 2,7 e 3,3, a distribuição pode ser considerada mesocúrtica.
Se a curtose for menor que 2,7, a distribuição é Platicúrtica.
Se a curtose for maior que 3,3, a distribuição é Leptocúrtica.
Se a assimetria for menor que -1 ou maior que 1, a distribuição é altamente assimétrica.
Se a assimetria estiver entre -1 e -0,5 ou entre 0,5 e 1, a distribuição é moderadamente assimétrica.
Se a assimetria estiver entre -0,5 e -0,15 ou entre 0,15 e 0,5, a distribuição é moderadamente assimétrica.
Se a assimetria estiver entre -0,15 e 0,15, a distribuição é aproximadamente simétrica.

4. Critérios clínicos baseado na VB e estado da arte


################################################################################
################################################################################
########### (4) Critérios clínicos baseado na VB e estado da arte ##############
################################################################################
################################################################################

library(utf8)
set.seed(210002859)

################################################################################
## 4.1 Critério clinicamente significativo - variação biológica ################
################################################################################

Limite_de_decisao_do_teste<-as.numeric(Limite_de_decisao_do_teste)

CVI<-as.numeric(Coeficiente_de_variação_biológico_individual________CVI)
CVG<-as.numeric(Coeficiente_de_variação_biológico_grupo_____________CVG)

CVI2<-Coeficiente_de_variação_biológico_individual________CVI
CVG2<-Coeficiente_de_variação_biológico_grupo_____________CVG

otimo<-especificações_de_desempenho_otimo
desejavel<-especificações_de_desempenho_desejavel
minimo<-especificações_de_desempenho_minimo

criterio_VB<-ifelse(otimo=="x"||otimo=="X","ótimo",
             ifelse(desejavel=="x"||desejavel=="X","desejável",
             ifelse(minimo=="x"||minimo=="X","mínimo","NA")))

Bias_otimo<-round(0.125*(CVI^2 + CVG^2)^0.5,3)
Bias_desejavel<-round(0.25*(CVI^2 + CVG^2)^0.5,3)
Bias_minimo<-round(0.375*(CVI^2 + CVG^2)^0.5,3)  

bias_permitido_VB<-ifelse(otimo=="x"||otimo=="X",Bias_otimo,
                   ifelse(desejavel=="x"||desejavel=="X",Bias_desejavel,
                   ifelse(minimo=="x"||minimo=="X",Bias_minimo,Bias_desejavel)))

bias_permitido_VB<-bias_permitido_VB/100

bias_permitido_VB2<-bias_permitido_VB*Limite_de_decisao_do_teste

################################################################################
## 4.2 Tabela critério clinicamente significativo - variação biológica #########
################################################################################

nome_coluna_tabela13<-c("Estatística","Resultados")

Tabela13<-data.frame(Estatistica=c("Coeficiente Variação Individual (CVi)",
         "Coeficiente Variação Grupo (CVg)",
         "Critério da Variação biológica",
         "Bias percentual permitido",
         "Limite de decisão médica",
         "Bias absoluto permitido"),
          Resultados=c(round(CVI,5),
                      round(CVG,5),
                      criterio_VB,
                      round(bias_permitido_VB*100,5),
                      round(Limite_de_decisao_do_teste,5),
                      round(bias_permitido_VB2,3)))

Titulo_Tabela13<-"Tabela 13. Bias permitido baseado  nos componentes da variação biológica - Modelo 2 da Conferência de Milão."

kable(Tabela13,col.names = nome_coluna_tabela13,
  align = "cc",caption = Titulo_Tabela13) %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2")
Tabela 13. Bias permitido baseado nos componentes da variação biológica - Modelo 2 da Conferência de Milão.
Estatística Resultados
Coeficiente Variação Individual (CVi) NA
Coeficiente Variação Grupo (CVg) NA
Critério da Variação biológica NA
Bias percentual permitido NA
Limite de decisão médica 2.7
Bias absoluto permitido NA
################################################################################
## 4.3 Critério clinicamente significativo - estado da arte ####################
################################################################################

refineR<-getRI(findRI(Data = base_de_dados_indiv$eixo_y),
               RIperc=c(0.025,0.975))

LI.IR.refineR<-round(refineR$PointEst[1],digits=3)

LS.IR.refineR<-round(refineR$PointEst[2],digits=3)

Intervalo.Referencia.RefineR<-paste("Intervalo Referencia 95%: ",
                              LI.IR.refineR," a ",LS.IR.refineR)

ir_metodo.refineR<-paste(LI.IR.refineR,LS.IR.refineR,sep=" a ")

main_metodo.refineR<-paste("IR refineR =",
                           ir_metodo.refineR,"(n =", n_y,")")

plot(findRI(Data = base_de_dados_indiv$eixo_y),Scale = "original",
                   RIperc = c(0.025,0.975),Nhist = 50,showCI = TRUE,
                   showPathol = TRUE,showValue = TRUE,ylim = NULL,
                   xlab = rotulo_eixo_y,ylab = "Frequencia Absoluta",
                   title = NA,cex.main = 0.5)

CV_e<-sqrt(exp((((ln(LS.IR.refineR)-ln(LI.IR.refineR))/3.92))^2)-1)

pCVa<-(((CV_e*100-0.25)^0.5)/100)

Med<-(LS.IR.refineR*LI.IR.refineR)^0.5

pSA_Med<-round(Med*pCVa,8)

Slope<-round((pSA_Med-0.2*pSA_Med)/Med,8)

pSA_xi<-round(((Limite_de_decisao_do_teste*Slope)+(0.2*pSA_Med)),8)

pCVA_xi<-round(pSA_xi/Limite_de_decisao_do_teste,8)

pB_xi<-round(pSA_xi*0.7,8)

pB_perc_xi<-round(pCVA_xi*0.7,8)

bias_perc_permitido_Estado_Arte<-round(pB_perc_xi*Limite_de_decisao_do_teste,3)

bias_abs_permitido_Estado_Arte<-round(pB_xi,3)

bias_permitido_Estado_Arte<-ifelse(Limite_de_decisao_do_teste=="",
                                   bias_abs_permitido_Estado_Arte,
                                   bias_perc_permitido_Estado_Arte)


################################################################################
## 4.4 Tabela critério clinicamente significativo - estado da arte #############
################################################################################

nome_coluna_tabela14<-c("Estatística","Resultados")

Tabela14<-data.frame(Estatistica=c("Limite Superior do IR",
         "Limite Inferior do IR",
         "CV empírico (CVe)",
         "CV analítico permitido (pCVa)",
         "Slope",
         "Mediana do Intervalo de Referência (Med)",
         "Desvio padrão analítico permitido para um valor na mediana (pSA_Med)",
         "Limite de decisão médica (xi)",
         "pSA_xi",
         "Bias % permitido no nível de decisão médica (pB_xi)",
         "Bias permitido no nível de decisão médica (pB_xi)"),
         Resultados=c(round(LS.IR.refineR,5),
                      round(LI.IR.refineR,5),
                      round(CV_e,5),
                      round(pCVa,5),
                      round(Slope,5),
                      round(Med,5),
                      round(pSA_Med,5),
                      round(Limite_de_decisao_do_teste,5),
                      round(pSA_xi,5),
                      round(pB_perc_xi*100,5),
                      round(bias_permitido_Estado_Arte,5)))


Titulo_Tabela14<-"Tabela 14. Bias permitido baseado  no estado da arte - Modelo 3 da Conferência de Milão."

kable(Tabela14,col.names = nome_coluna_tabela14,
  align = "cc",caption = Titulo_Tabela14) %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2")
Tabela 14. Bias permitido baseado no estado da arte - Modelo 3 da Conferência de Milão.
Estatística Resultados
Limite Superior do IR 5.38800
Limite Inferior do IR 0.51700
CV empírico (CVe) 0.65558
CV analítico permitido (pCVa) 0.08081
Slope 0.06465
Mediana do Intervalo de Referência (Med) 1.66901
Desvio padrão analítico permitido para um valor na mediana (pSA_Med) 0.13488
Limite de decisão médica (xi) 2.70000
pSA_xi 0.20153
Bias % permitido no nível de decisão médica (pB_xi) 5.22490
Bias permitido no nível de decisão médica (pB_xi) 0.14100
################################################################################
## 4.5 Critério clinicamente significativo - estado arte e VB ##################
################################################################################

bias_permitido<-ifelse(CVI2==""||CVG2=="",bias_permitido_Estado_Arte,
                                          bias_permitido_VB2)

Criterio_selecionado<-ifelse(CVI2==""||CVG2=="",
"Modelo 3 da Conferência de Milão - Bias permitido baseado no Estado Arte",
"Modelo 2 da Conferência de Milão - Bias permitido nos componentes da variação biológica")

Tabela15<-data.frame(Estatistica=bias_permitido)

nome_coluna_tabela15<-"Bias Permitido Selecionado"

Titulo_Tabela15<-"Tabela 15. Critério clínico selecionado."

kable(Tabela15,col.names = nome_coluna_tabela15,
   align = "cc",caption = Titulo_Tabela15) %>%
   kable_styling(full_width = FALSE,
   bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  footnote(general_title = "Fonte:",general = Criterio_selecionado)
Tabela 15. Critério clínico selecionado.
Bias Permitido Selecionado
0.141
Fonte:
Modelo 3 da Conferência de Milão - Bias permitido baseado no Estado Arte


5. Comparação entre faixas (Resultados individuais)


################################################################################
################################################################################
########### (5) Comparação entre faixas (Resultados individuais) ###############
################################################################################
################################################################################

library(utf8)
set.seed(210002859)

################################################################################
## 5.1 Gráfico Barra de Erro ###################################################
################################################################################

base_de_dados_vio<- base_de_dados_indiv

base_de_dados_vio$Grupo <- factor(base_de_dados_vio$Grupo, 
                                  levels = c("PRL<7", 
                                             "7<=PRL<25", 
                                             "25<=PRL<100",
                                             "PRL>100"))

graf_viol<- ggplot(base_de_dados_vio, aes(x=Grupo, y=eixo_y)) +
  geom_violin(aes(fill=Grupo), trim=FALSE) +
  stat_summary(fun = median, geom = "crossbar", width = 0.5, color = "black") +
  labs(x=rotulo_eixo_x, y=rotulo_eixo_y) +
  scale_fill_discrete(name="Grupos") +
  scale_y_continuous(n.breaks = 8) +
  theme(axis.line = element_line(colour = "black", linewidth = 0.5),
  axis.title = element_text(size = 14),
  legend.text  = element_text(colour = "black", size = 14),
  axis.text  = element_text(colour = "black", size = 14),
  plot.title = element_text(size = 14, face = "bold"),
  plot.background = element_blank(),
  panel.background = element_blank())+
  guides(fill=FALSE)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
graf_viol

ggsave("4_Figuras/Fig.9_Graf_Violino_Comp_Faixas_PRL_resultados_indiv.png",
       plot = graf_viol, device = "png")
## Saving 7 x 5 in image
################################################################################
## 5.2 Tabela com as médias por faixa ##########################################
################################################################################

Tabela16 <- base_de_dados_vio %>%
  group_by(Grupo) %>%
  summarize(Tamanho_Amostral = n(),
            Percentil_25_Desf = quantile(eixo_y, 0.25, na.rm = TRUE),
            Mediana_Desf = median(eixo_y, na.rm = TRUE),
            Percentil_75_Desf = quantile(eixo_y, 0.75, na.rm = TRUE),
            Percentil_25_PRL = quantile(eixo_x, 0.25, na.rm = TRUE),
            Mediana_PRL = median(eixo_x, na.rm = TRUE),
            Percentil_75_PRL = quantile(eixo_x, 0.75, na.rm = TRUE))

colnames(Tabela16)<-c("Faixa de resultados PRL","n",
                      paste("Percentil 25",rotulo_eixo_y1),
                      paste("Mediana",rotulo_eixo_y1),
                      paste("Percentil 75",rotulo_eixo_y1),
                      paste("Percentil 25",rotulo_eixo_x1),
                      paste("Mediana",rotulo_eixo_x1),
                      paste("Percentil 75",rotulo_eixo_x1))

Tabela16_Titulo<-"Tabela 16. Resultados individuais por faixa de concentração - resultados individuais."

kable(Tabela16,align = "ccc",caption = Tabela16_Titulo) %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White", background = "#0d47a1") %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2")
Tabela 16. Resultados individuais por faixa de concentração - resultados individuais.
Faixa de resultados PRL n Percentil 25 HOMA-IR Mediana HOMA-IR Percentil 75 HOMA-IR Percentil 25 Prolactina Mediana Prolactina Percentil 75 Prolactina
PRL<7 18939 1.3 2.1 3.3 4.6 5.5 6.3
7<=PRL<25 42644 1.3 1.9 2.9 8.5 10.6 14.2
25<=PRL<100 4035 1.3 1.9 2.7 27.8 32.1 40.6
PRL>100 177 1.1 1.7 2.9 118.2 141.7 175.6
################################################################################
## 5.3 Tetes kruskal wallis ####################################################
################################################################################

base_de_dados_indiv_kw<-data.frame(Resultado=base_de_dados_indiv$eixo_y,
                                   Grupo=base_de_dados_indiv$Grupo)
colnames(base_de_dados_indiv_kw)[1]<-c("Resultado","Grupo")
## Warning in colnames(base_de_dados_indiv_kw)[1] <- c("Resultado", "Grupo"):
## número de itens para para substituir não é um múltiplo do comprimento do
## substituto
fit.kw <- kruskal.test(Resultado ~ Grupo, data = base_de_dados_indiv_kw)

chi_squared_kw<-round(as.numeric(fit.kw$statistic),3)
df_kw<-as.numeric(fit.kw$parameter)
p_kw<-round(round(fit.kw$p.value),5)
p_kw<-ifelse(p_kw<0.00005,"< 0.00005",p_kw)

titulo_Tabela17<-"Tabela 17. Teste de Kruskal-Wallis (Critério A - Avaliando a significância estatística) - resultados individuais."

resultados_kw<-c(chi_squared_kw,df_kw,p_kw)

estatisticas_kw <- c("Kruskal-Wallis chi-squared","df","p-value")

Tabela17<-data.frame(Estatisticas=estatisticas_kw,Resultados=resultados_kw)

colnames_Tabela17 <- c("Estatísticas","Resultados")

kable(Tabela17,col.names = colnames_Tabela17,
  align = "ccc",caption = titulo_Tabela17) %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2")
Tabela 17. Teste de Kruskal-Wallis (Critério A - Avaliando a significância estatística) - resultados individuais.
Estatísticas Resultados
Kruskal-Wallis chi-squared 341.764
df 3
p-value < 0.00005
################################################################################
## 5.4 Tetes posthoc dunn (Critério A - Avaliando significância estatística) ###
################################################################################

teste_posthoc<-dunn_test(Resultado~Grupo,data =base_de_dados_indiv_kw,
                         p.adjust.method = "bonferroni")
estatisticas_kw <- c("Kruskal-Wallis chi-squared","df","p-value")

Grupo_1_dunn<-teste_posthoc$group1
Grupo_2_dunn<-teste_posthoc$group2
n_1_dunn<-teste_posthoc$n1
n_2_dunn<-teste_posthoc$n2
p.adj_dunn<-round(teste_posthoc$p.adj,5)

Tabela18<-data.frame(Grupo_1=Grupo_1_dunn,
                        Grupo_2=Grupo_2_dunn,
                        n1=n_1_dunn,n2=n_2_dunn,
                        p.adj=p.adj_dunn)

Tabela18 <- Tabela18 %>%
  dplyr::mutate(Interpretacao = ifelse(Tabela18$p.adj<0.005,
                                "Diferente","Igual"))

colnames_Tabela18 <- c("Grupo 1","Grupo 2","n1","n2",
                      "p-valor ajustado","Interpretação")

titulo_Tabela18<-"Tabela 18. Teste Post hoc de Dunn (Critério A - Avaliando a significância estatística, alfa = 0.005)  - resultados individuais."

kable(Tabela18,col.names = colnames_Tabela18,
  align = "ccc",caption = titulo_Tabela18) %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2") %>%
  column_spec(2,bold = TRUE,color = "White",background = "#1976d2") %>%
  column_spec(5:6, color = "black", background=ifelse(Tabela18$p.adj<0.005,
  "lightpink","palegreen"))
Tabela 18. Teste Post hoc de Dunn (Critério A - Avaliando a significância estatística, alfa = 0.005) - resultados individuais.
Grupo 1 Grupo 2 n1 n2 p-valor ajustado Interpretação
25<=PRL<100 7<=PRL<25 4035 42644 0.03997 Igual
25<=PRL<100 PRL<7 4035 18939 0.00000 Diferente
25<=PRL<100 PRL>100 4035 177 1.00000 Igual
7<=PRL<25 PRL<7 42644 18939 0.00000 Diferente
7<=PRL<25 PRL>100 42644 177 0.85941 Igual
PRL<7 PRL>100 18939 177 0.00298 Diferente
################################################################################
## 5.5 Tamanaho efeito (Critério B.1 - Avaliando a relevância prática) #########
################################################################################

TDE_epsilon2<-epsilonSquared(x=base_de_dados_indiv_kw$Resultado,
                         g=base_de_dados_indiv_kw$Grupo,ci=T)
TDE_epsilon2<-as.numeric(TDE_epsilon2)

TDE_epsilon2_valor<-TDE_epsilon2[1]

TDE_epsilon2_LI_IC95<-TDE_epsilon2[2]
TDE_epsilon2_LS_IC95<-TDE_epsilon2[3]

Interpretacao_TDE_epsilon2<-ifelse(TDE_epsilon2_valor<0.01,"irrisório",
                            ifelse(TDE_epsilon2_valor>=0.01 &
                                   TDE_epsilon2_valor<0.08,"Pequeno",
                            ifelse(TDE_epsilon2_valor>=0.08 &
                                   TDE_epsilon2_valor<0.26,"Moderado",
                            ifelse(TDE_epsilon2_valor>=0.26,"Grande","---"))))

Tabela19<-data.frame(Metodo="epsilon2 ordinal",
                     TDE=TDE_epsilon2_valor,
                     LI_TDE=TDE_epsilon2_LI_IC95,
                     LS_TDE=TDE_epsilon2_LS_IC95,
                     Interpretacao=Interpretacao_TDE_epsilon2) 

colnames_Tabela19<-c("Método","TDE",
                     "Limite Inferior IC 95%",
                     "Limite Superior IC 95%",
                     "Interpretação")

titulo_Tabela19<-"Tabela 19. Tamanho de efeito (TDE) do Grupo (Critério B.1 - Avaliando a significância prática)."

kable(Tabela19,col.names = colnames_Tabela19,
  align = "ccc",caption = titulo_Tabela19) %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1")
Tabela 19. Tamanho de efeito (TDE) do Grupo (Critério B.1 - Avaliando a significância prática).
Método TDE Limite Inferior IC 95% Limite Superior IC 95% Interpretação
epsilon2 ordinal 0.00519 0.00415 0.00639 irrisório
titulo_Tabela20<-"Tabela 20. Tamanho de efeito Linguagem Comum (TDE-LC) pelo método Vargha-Delaney A (Critério B.1 - Avaliando a significância prática)  - resultados individuais."

multiVDA<-rcompanion::multiVDA(Resultado~Grupo,data = base_de_dados_indiv_kw)

Tabela20<-data.frame(multiVDA$pairs)

Tabela20<-data.frame(Comparacao=Tabela20$Comparison,
                     Resultados=Tabela20$VDA,
                     Resultados_max=Tabela20$VDA.m)

Tabela20 <- data.frame(Tabela20,Interpretacao = "")

Tabela20 <- Tabela20 %>%
  dplyr::mutate(Interpretacao = ifelse(Tabela20$Resultados_max<0.56,"irrisório",
                                ifelse(Tabela20$Resultados_max>=0.56 &
                                Tabela20$Resultados_max<0.64,"Pequeno",
                                ifelse(Tabela20$Resultados_max>=0.64 &
                                Tabela20$Resultados_max<0.71,"Moderado",
                                ifelse(Tabela20$Resultados_max>=0.71,"Grande","---")))))

colnames_Tabela20 <- c("Comparação","TDE-LC","TDE-LC máximo","Interpretação")

kable(Tabela20,col.names = colnames_Tabela20,
  align = "ccc",caption = titulo_Tabela20) %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1")
Tabela 20. Tamanho de efeito Linguagem Comum (TDE-LC) pelo método Vargha-Delaney A (Critério B.1 - Avaliando a significância prática) - resultados individuais.
Comparação TDE-LC TDE-LC máximo Interpretação
25<=PRL<100 - 7<=PRL<25 0.487 0.513 irrisório
25<=PRL<100 - PRL<7 0.443 0.557 irrisório
25<=PRL<100 - PRL>100 0.523 0.523 irrisório
7<=PRL<25 - PRL<7 0.456 0.544 irrisório
7<=PRL<25 - PRL>100 0.533 0.533 irrisório
PRL<7 - PRL>100 0.571 0.571 Pequeno
################################################################################
## 5.6 Bias desejável (Critério B.2 - Avaliando significância clínica) #########
################################################################################

base_de_dados_indiv_agrupado <- base_de_dados_indiv %>%
  group_by(Grupo) %>%
  summarise(dp=sd(eixo_y, na.rm = TRUE),
            n=length(eixo_y),
            Media_y = mean(eixo_y),Media_x=mean(eixo_x))

base_de_dados_indiv_agrupado_org<- base_de_dados_indiv_agrupado %>%
  arrange(Media_x)

A_Faixa<-base_de_dados_indiv_agrupado_org$Grupo[1]
B_Faixa<-base_de_dados_indiv_agrupado_org$Grupo[2]
C_Faixa<-base_de_dados_indiv_agrupado_org$Grupo[3]
D_Faixa<-base_de_dados_indiv_agrupado_org$Grupo[4]

A_Faixa_menos_B_Faixa<-paste("'",A_Faixa,"'  -  '",B_Faixa,"'",sep = "")
A_Faixa_menos_C_Faixa<-paste("'",A_Faixa,"'  -  '",C_Faixa,"'",sep = "")
A_Faixa_menos_D_Faixa<-paste("'",A_Faixa,"'  -  '",D_Faixa,"'",sep = "")
B_Faixa_menos_C_Faixa<-paste("'",B_Faixa,"'  -  '",C_Faixa,"'",sep = "")
B_Faixa_menos_D_Faixa<-paste("'",B_Faixa,"'  -  '",D_Faixa,"'",sep = "")
C_Faixa_menos_D_Faixa<-paste("'",C_Faixa,"'  -  '",D_Faixa,"'",sep = "")

Lista_faixas<-c(A_Faixa_menos_B_Faixa,A_Faixa_menos_C_Faixa,
                A_Faixa_menos_D_Faixa,B_Faixa_menos_C_Faixa,
                B_Faixa_menos_D_Faixa,C_Faixa_menos_D_Faixa)

A_media<-round(base_de_dados_indiv_agrupado_org$Media_y[1],3)
B_media<-round(base_de_dados_indiv_agrupado_org$Media_y[2],3)
C_media<-round(base_de_dados_indiv_agrupado_org$Media_y[3],3)
D_media<-round(base_de_dados_indiv_agrupado_org$Media_y[4],3)

A_media_menos_B_media<-paste("|",A_media," - ",B_media,"|",sep = "")
A_media_menos_C_media<-paste("|",A_media," - ",C_media,"|",sep = "")
A_media_menos_D_media<-paste("|",A_media," - ",D_media,"|",sep = "")
B_media_menos_C_media<-paste("|",B_media," - ",C_media,"|",sep = "")
B_media_menos_D_media<-paste("|",B_media," - ",D_media,"|",sep = "")
C_media_menos_D_media<-paste("|",C_media," - ",D_media,"|",sep = "")

Lista_Medias<-c(A_media_menos_B_media,A_media_menos_C_media,
                A_media_menos_D_media,B_media_menos_C_media,
                B_media_menos_D_media,C_media_menos_D_media)

A_media_B_media<-round(abs(A_media-B_media),3)
A_media_B_media<-ifelse(is.na(A_media_B_media)==TRUE,"---",A_media_B_media)

A_media_C_media<-round(abs(A_media-C_media),3)
A_media_C_media<-ifelse(is.na(A_media_C_media)==TRUE,"---",A_media_C_media)

A_media_D_media<-round(abs(A_media-D_media),3)
A_media_D_media<-ifelse(is.na(A_media_D_media)==TRUE,"---",A_media_D_media)

B_media_C_media<-round(abs(B_media-C_media),3)
B_media_C_media<-ifelse(is.na(B_media_C_media)==TRUE,"---",B_media_C_media)
                        
B_media_D_media<-round(abs(B_media-D_media),3)
B_media_D_media<-ifelse(is.na(B_media_D_media)==TRUE,"---",B_media_D_media)

C_media_D_media<-round(abs(C_media-D_media),3)
C_media_D_media<-ifelse(is.na(C_media_D_media)==TRUE,"---",C_media_D_media)

Lista_dif_media<-c(A_media_B_media,A_media_C_media,A_media_D_media,
                   B_media_C_media,B_media_D_media,C_media_D_media)

Tabela21<-data.frame(Faixa_Faixa2=Lista_faixas,
                     Media_Media=Lista_Medias,
                     Dif_Medias=Lista_dif_media)

Tabela21$Bias_absoluto<-bias_permitido

Tabela21 <- data.frame(Tabela21,Interpretacao = "")

Tabela21 <- Tabela21 %>%
  dplyr::mutate(Interpretacao = ifelse(Tabela21$Dif_Medias=="---",
                                       "---",ifelse(Tabela21$Dif_Medias>
                                       Tabela21$Bias_absoluto,
                                       "Diferente","Igual")))

colnames_Tabela21<-c("Diferença entre Faixas",
                    "Diferença entre médias em módulo",
                    "Média das diferenças","Bias absoluto permitido",
                    "Interpretação")

Titulo_Tabela21<-"Tabela 21. Comparação resultado entre faixas Prolactina (Critério B.2 - Avaliando significância clínica) - resultados individuais."

kable(Tabela21,align = "ccc",col.names = colnames_Tabela21,
  caption = Titulo_Tabela21) %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2") %>%
  column_spec(3:5, color = "black",
  background=ifelse(Tabela21$Dif_Medias=="---","",
                    ifelse(Tabela21$Dif_Medias>Tabela21$Bias_absoluto,
                                "lightpink","palegreen")))
Tabela 21. Comparação resultado entre faixas Prolactina (Critério B.2 - Avaliando significância clínica) - resultados individuais.
Diferença entre Faixas Diferença entre médias em módulo Média das diferenças Bias absoluto permitido Interpretação
‘PRL<7’ - ‘7<=PRL<25’ |2.77 - 2.424| 0.346 0.141 Diferente
‘PRL<7’ - ‘25<=PRL<100’ |2.77 - 2.293| 0.477 0.141 Diferente
‘PRL<7’ - ‘PRL>100’ |2.77 - 2.555| 0.215 0.141 Diferente
‘7<=PRL<25’ - ‘25<=PRL<100’ |2.424 - 2.293| 0.131 0.141 Igual
‘7<=PRL<25’ - ‘PRL>100’ |2.424 - 2.555| 0.131 0.141 Igual
‘25<=PRL<100’ - ‘PRL>100’ |2.293 - 2.555| 0.262 0.141 Diferente


6. Comparação entre faixas (Resultados médios)


################################################################################
################################################################################
########### (6) Comparação entre faixas (Resultados médios) ####################
################################################################################
################################################################################

library(utf8)
set.seed(210002859)

################################################################################
## 6.1 Gráfico Violino #########################################################
################################################################################

base_de_dados_vio2<- base_de_dados 

base_de_dados_vio2$Grupo <- factor(base_de_dados_vio2$Grupo, 
                                  levels = c("PRL<7", 
                                             "7<=PRL<25", 
                                             "25<=PRL<100",
                                             "PRL>100"))

graf_viol2<- ggplot(base_de_dados_vio2, aes(x=Grupo, y=eixo_y)) +
  geom_violin(aes(fill=Grupo), trim=FALSE) +
  stat_summary(fun = median, geom = "crossbar", width = 0.5, color = "black") +
  labs(x=rotulo_eixo_x_médio, y=rotulo_eixo_y_médio) +
  scale_fill_discrete(name="Grupos") +
  scale_y_continuous(n.breaks = 8) +
  theme(axis.line = element_line(colour = "black", linewidth = 0.5),
  axis.title = element_text(size = 14),
  legend.text  = element_text(colour = "black", size = 14),
  axis.text  = element_text(colour = "black", size = 14),
  plot.title = element_text(size = 14, face = "bold"),
  plot.background = element_blank(),
  panel.background = element_blank())+
  guides(fill=FALSE)

graf_viol2

ggsave("4_Figuras/Fig.10_Graf_Violino_Comp_Faixas_PRL_resultados_medio.png",
       plot = graf_viol2, device = "png")
## Saving 7 x 5 in image
################################################################################
## 6.2 Tabela com as médias por faixa ##########################################
################################################################################

Tabela22 <- base_de_dados_vio2 %>%
  group_by(Grupo) %>%
  summarize(Tamanho_Amostral = n(),
            Percentil_25_Desf = quantile(eixo_y, 0.25, na.rm = TRUE),
            Mediana_Desf = median(eixo_y, na.rm = TRUE),
            Percentil_75_Desf = quantile(eixo_y, 0.75, na.rm = TRUE),
            Percentil_25_PRL = quantile(eixo_x, 0.25, na.rm = TRUE),
            Mediana_PRL = median(eixo_x, na.rm = TRUE),
            Percentil_75_PRL = quantile(eixo_x, 0.75, na.rm = TRUE))

colnames(Tabela22)<-c("Faixa de resultados PRL","n",
                      paste("Percentil 25",rotulo_eixo_y1),
                      paste("Mediana",rotulo_eixo_y1),
                      paste("Percentil 75",rotulo_eixo_y1),
                      paste("Percentil 25",rotulo_eixo_x1),
                      paste("Mediana",rotulo_eixo_x1),
                      paste("Percentil 75",rotulo_eixo_x1))

Tabela22_Titulo<-"Tabela 22. Resultados individuais por faixa de concentração - resultados médios."

kable(Tabela22,align = "ccc",caption = Tabela22_Titulo) %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White", background = "#0d47a1") %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2")
Tabela 22. Resultados individuais por faixa de concentração - resultados médios.
Faixa de resultados PRL n Percentil 25 HOMA-IR Mediana HOMA-IR Percentil 75 HOMA-IR Percentil 25 Prolactina Mediana Prolactina Percentil 75 Prolactina
PRL<7 17 2.72300 2.7640 2.97600 4.04900 4.8030 5.80300
7<=PRL<25 60 2.24075 2.3595 2.46150 10.73375 14.4525 18.16125
25<=PRL<100 23 2.16900 2.3040 2.36150 27.94900 32.9690 43.91800
PRL>100 6 2.11725 2.4765 2.81775 118.30875 139.0020 168.58200
################################################################################
## 6.3 Tetes kruskal wallis ####################################################
################################################################################

base_de_dados_kw<-data.frame(Resultado=base_de_dados$eixo_y,
                                   Grupo=base_de_dados$Grupo)
colnames(base_de_dados_kw)[1]<-c("Resultado","Grupo")
## Warning in colnames(base_de_dados_kw)[1] <- c("Resultado", "Grupo"): número de
## itens para para substituir não é um múltiplo do comprimento do substituto
fit.kw2 <- kruskal.test(Resultado ~ Grupo, data = base_de_dados_kw)

chi_squared_kw2<-round(as.numeric(fit.kw2$statistic),3)
df_kw2<-as.numeric(fit.kw2$parameter)
p_kw2<-round(round(fit.kw2$p.value),5)
p_kw2<-ifelse(p_kw2<0.00005,"< 0.00005",p_kw2)

titulo_Tabela23<-"Tabela 23. Teste de Kruskal-Wallis (Critério A - Avaliando a significância estatística) - resultados médios."
resultados_kw2<-c(chi_squared_kw2,df_kw2,p_kw2)
estatisticas_kw2 <- c("Kruskal-Wallis chi-squared","df","p-value")

Tabela23<-data.frame(Estatisticas=estatisticas_kw2,Resultados=resultados_kw2)
colnames_Tabela23 <- c("Estatísticas","Resultados")

kable(Tabela23,col.names = colnames_Tabela23,align = "ccc",
  caption = titulo_Tabela23) %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  column_spec(1,bold = TRUE, color = "White",background = "#1976d2")
Tabela 23. Teste de Kruskal-Wallis (Critério A - Avaliando a significância estatística) - resultados médios.
Estatísticas Resultados
Kruskal-Wallis chi-squared 39.615
df 3
p-value < 0.00005
################################################################################
## 6.4 Tetes posthoc dunn (Critério A - Avaliando significância estatística) ###
################################################################################

teste_posthoc2<-dunn_test(Resultado~Grupo,data =base_de_dados_kw,
                         p.adjust.method = "bonferroni")
estatisticas_kw2 <- c("Kruskal-Wallis chi-squared","df","p-value")

Grupo_1_dunn2<-teste_posthoc2$group1
Grupo_2_dunn2<-teste_posthoc2$group2
n_1_dunn2<-teste_posthoc2$n1
n_2_dunn2<-teste_posthoc2$n2
p.adj_dunn2<-round(teste_posthoc2$p.adj,5)

Tabela24<-data.frame(Grupo_1=Grupo_1_dunn2,
                     Grupo_2=Grupo_2_dunn2,
                     n1=n_1_dunn2,n2=n_2_dunn2,
                     p.adj=p.adj_dunn2)

Tabela24 <- Tabela24 %>%
  dplyr::mutate(Interpretacao = ifelse(Tabela24$p.adj<0.005,
                                "Diferente","Igual"))

colnames_Tabela24 <- c("Grupo 1","Grupo 2","n1","n2",
                   "p-valor ajustado","Interpretação")

titulo_Tabela24<-"Tabela 24. Teste Post hoc de Dunn (Critério A - Avaliando a significância estatística, alfa = 0.005) - resultados médios."

kable(Tabela24,col.names = colnames_Tabela24,align = "ccc",
  caption = titulo_Tabela24) %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2") %>%
  column_spec(2,bold = TRUE,color = "White",background = "#1976d2") %>%
  column_spec(5:6, color = "black",background=ifelse(Tabela24$p.adj<0.005,
                                "lightpink","palegreen"))
Tabela 24. Teste Post hoc de Dunn (Critério A - Avaliando a significância estatística, alfa = 0.005) - resultados médios.
Grupo 1 Grupo 2 n1 n2 p-valor ajustado Interpretação
25<=PRL<100 7<=PRL<25 23 60 1.00000 Igual
25<=PRL<100 PRL<7 23 17 0.00000 Diferente
25<=PRL<100 PRL>100 23 6 1.00000 Igual
7<=PRL<25 PRL<7 60 17 0.00000 Diferente
7<=PRL<25 PRL>100 60 6 1.00000 Igual
PRL<7 PRL>100 17 6 0.02513 Igual
################################################################################
## 6.5 Tamanaho efeito (Critério B.1 - Avaliando a relevância prática) #########
################################################################################

TDE_epsilon22<-epsilonSquared(x=base_de_dados_kw$Resultado,
                         g=base_de_dados_kw$Grupo,ci=T)
TDE_epsilon22<-as.numeric(TDE_epsilon22)


TDE_epsilon22_valor<-TDE_epsilon22[1]
TDE_epsilon22_LI_IC95<-TDE_epsilon22[2]
TDE_epsilon22_LS_IC95<-TDE_epsilon22[3]

Interpretacao_TDE_epsilon22<-ifelse(TDE_epsilon22_valor<0.01,"irrisório",
                             ifelse(TDE_epsilon22_valor>=0.01 &
                                   TDE_epsilon22_valor<0.08,"Pequeno",
                             ifelse(TDE_epsilon22_valor>=0.08 &
                                   TDE_epsilon22_valor<0.26,"Moderado",
                             ifelse(TDE_epsilon22_valor>=0.26,"Grande","---"))))

Tabela25<-data.frame(Metodo="epsilon2 ordinal",
                     TDE=TDE_epsilon22_valor,
                     LI_TDE=TDE_epsilon22_LI_IC95,
                     LS_TDE=TDE_epsilon22_LS_IC95,
                     Interpretacao=Interpretacao_TDE_epsilon22) 

colnames_Tabela25<-c("Método","TDE",
                     "Limite Inferior IC 95%",
                     "Limite Superior IC 95%",
                     "Interpretação")

titulo_Tabela25<-"Tabela 25. Tamanho de efeito (TDE) do Grupo (Critério B.1 - Avaliando a significância prática) - resultados médios."

kable(Tabela25,col.names = colnames_Tabela25,align = "ccc",
  caption = titulo_Tabela25) %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1")
Tabela 25. Tamanho de efeito (TDE) do Grupo (Critério B.1 - Avaliando a significância prática) - resultados médios.
Método TDE Limite Inferior IC 95% Limite Superior IC 95% Interpretação
epsilon2 ordinal 0.377 0.264 0.548 Grande
multiVDA<-rcompanion::multiVDA(Resultado~Grupo,data = base_de_dados_kw)

Tabela26<-data.frame(multiVDA$pairs)

Tabela26<-data.frame(Comparacao=Tabela26$Comparison,
                     Resultados=Tabela26$VDA,
                     Resultados_max=Tabela26$VDA.m)

Tabela26 <- data.frame(Tabela26,Interpretacao = "")

Tabela26 <- Tabela26 %>%
  dplyr::mutate(Interpretacao = ifelse(Tabela26$Resultados_max<0.56,"irrisório",
                                ifelse(Tabela26$Resultados_max>=0.56 &
                                Tabela26$Resultados_max<0.64,"Pequeno",
                                ifelse(Tabela26$Resultados_max>=0.64 &
                                Tabela26$Resultados_max<0.71,"Moderado",
                                ifelse(Tabela26$Resultados_max>=0.71,"Grande","---")))))

colnames_Tabela26 <- c("Comparação","TDE-LC","TDE-LC máximo","Interpretação")

titulo_Tabela26<-"Tabela 26. Tamanho de efeito Linguagem Comum (TDE-LC) pelo método Vargha-Delaney A (Critério B.1 - Avaliando a significância prática) - resultados médios."

kable(Tabela26,col.names = colnames_Tabela26,align = "ccc",
  caption = titulo_Tabela26) %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White", background = "#0d47a1")
Tabela 26. Tamanho de efeito Linguagem Comum (TDE-LC) pelo método Vargha-Delaney A (Critério B.1 - Avaliando a significância prática) - resultados médios.
Comparação TDE-LC TDE-LC máximo Interpretação
25<=PRL<100 - 7<=PRL<25 0.39500 0.60500 Pequeno
25<=PRL<100 - PRL<7 0.00256 0.99744 Grande
25<=PRL<100 - PRL>100 0.44200 0.55800 irrisório
7<=PRL<25 - PRL<7 0.00490 0.99510 Grande
7<=PRL<25 - PRL>100 0.48100 0.51900 irrisório
PRL<7 - PRL>100 0.63700 0.63700 Pequeno
################################################################################
## 6.6 Bias desejável (Critério B.2 - Avaliando significância clínica) #########
################################################################################

base_de_dados_agrupado <- base_de_dados %>%
  group_by(Grupo) %>%
  summarise(ep=sd(eixo_y, na.rm = TRUE)/sqrt(length(eixo_y)),
            n=length(eixo_y),
            Media_y = mean(eixo_y),Media_x=mean(eixo_x))

base_de_dados_agrupado_org<- base_de_dados_agrupado %>%
  arrange(Media_x)

A_Faixa<-base_de_dados_agrupado_org$Grupo[1]
B_Faixa<-base_de_dados_agrupado_org$Grupo[2]
C_Faixa<-base_de_dados_agrupado_org$Grupo[3]
D_Faixa<-base_de_dados_agrupado_org$Grupo[4]

A_Faixa_menos_B_Faixa<-paste("'",A_Faixa,"'  -  '",B_Faixa,"'",sep = "")
A_Faixa_menos_C_Faixa<-paste("'",A_Faixa,"'  -  '",C_Faixa,"'",sep = "")
A_Faixa_menos_D_Faixa<-paste("'",A_Faixa,"'  -  '",D_Faixa,"'",sep = "")
B_Faixa_menos_C_Faixa<-paste("'",B_Faixa,"'  -  '",C_Faixa,"'",sep = "")
B_Faixa_menos_D_Faixa<-paste("'",B_Faixa,"'  -  '",D_Faixa,"'",sep = "")
C_Faixa_menos_D_Faixa<-paste("'",C_Faixa,"'  -  '",D_Faixa,"'",sep = "")

Lista_faixas<-c(A_Faixa_menos_B_Faixa,A_Faixa_menos_C_Faixa,
                A_Faixa_menos_D_Faixa,B_Faixa_menos_C_Faixa,
                B_Faixa_menos_D_Faixa,C_Faixa_menos_D_Faixa)

A_media<-round(base_de_dados_agrupado_org$Media_y[1],3)
B_media<-round(base_de_dados_agrupado_org$Media_y[2],3)
C_media<-round(base_de_dados_agrupado_org$Media_y[3],3)
D_media<-round(base_de_dados_agrupado_org$Media_y[4],3)

A_media_menos_B_media<-paste("|",A_media," - ",B_media,"|",sep = "")
A_media_menos_C_media<-paste("|",A_media," - ",C_media,"|",sep = "")
A_media_menos_D_media<-paste("|",A_media," - ",D_media,"|",sep = "")
B_media_menos_C_media<-paste("|",B_media," - ",C_media,"|",sep = "")
B_media_menos_D_media<-paste("|",B_media," - ",D_media,"|",sep = "")
C_media_menos_D_media<-paste("|",C_media," - ",D_media,"|",sep = "")

Lista_Medias<-c(A_media_menos_B_media,A_media_menos_C_media,
                A_media_menos_D_media,B_media_menos_C_media,
                B_media_menos_D_media,C_media_menos_D_media)

A_media_B_media2<-round(abs(A_media-B_media),3)
A_media_B_media2<-ifelse(is.na(A_media_B_media2)==TRUE,"---",A_media_B_media2)

A_media_C_media2<-round(abs(A_media-C_media),3)
A_media_C_media2<-ifelse(is.na(A_media_C_media2)==TRUE,"---",A_media_C_media2)

A_media_D_media2<-round(abs(A_media-D_media),3)
A_media_D_media2<-ifelse(is.na(A_media_D_media2)==TRUE,"---",A_media_D_media2)

B_media_C_media2<-round(abs(B_media-C_media),3)
B_media_C_media2<-ifelse(is.na(B_media_C_media2)==TRUE,"---",B_media_C_media2)

B_media_D_media2<-round(abs(B_media-D_media),3)
B_media_D_media2<-ifelse(is.na(B_media_D_media2)==TRUE,"---",B_media_D_media2)

C_media_D_media2<-round(abs(C_media-D_media),3)
C_media_D_media2<-ifelse(is.na(C_media_D_media2)==TRUE,"---",C_media_D_media2)

Lista_dif_media<-c(A_media_B_media2,A_media_C_media2,A_media_D_media2,
                   B_media_C_media2,B_media_D_media2,C_media_D_media2)

Tabela27<-data.frame(Faixa_Faixa2=Lista_faixas,
                     Media_Media=Lista_Medias,
                     Dif_Medias=Lista_dif_media)

Tabela27$Bias_absoluto<-bias_permitido

Tabela27 <- data.frame(Tabela27,Interpretacao = "")

Tabela27 <- Tabela27 %>%
  dplyr::mutate(Interpretacao = ifelse(Tabela27$Dif_Medias=="---",
                                       "---",ifelse(Tabela27$Dif_Medias>
                                       Tabela27$Bias_absoluto,
                                       "Diferente","Igual")))

colnames_Tabela27<-c("Diferença entre Faixas",
                  "Diferença entre médias em módulo",
                  "Média das diferenças","Bias absoluto permitido",
                   "Interpretação")

Titulo_Tabela27<-"Tabela 27. Comparação resultado entre faixas Prolactina (Critério B.2 - Avaliando significância clínica) - resultados médios."

kable(Tabela27,align = "ccc",col.names = colnames_Tabela27,
  caption = Titulo_Tabela27) %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2") %>%
  column_spec(3:5, color = "black",
  background=ifelse(Tabela27$Dif_Medias=="---","",
                    ifelse(Tabela27$Dif_Medias>Tabela27$Bias_absoluto,
                                "lightpink","palegreen")))
Tabela 27. Comparação resultado entre faixas Prolactina (Critério B.2 - Avaliando significância clínica) - resultados médios.
Diferença entre Faixas Diferença entre médias em módulo Média das diferenças Bias absoluto permitido Interpretação
‘PRL<7’ - ‘7<=PRL<25’ |2.835 - 2.358| 0.477 0.141 Diferente
‘PRL<7’ - ‘25<=PRL<100’ |2.835 - 2.307| 0.528 0.141 Diferente
‘PRL<7’ - ‘PRL>100’ |2.835 - 2.52| 0.315 0.141 Diferente
‘7<=PRL<25’ - ‘25<=PRL<100’ |2.358 - 2.307| 0.051 0.141 Igual
‘7<=PRL<25’ - ‘PRL>100’ |2.358 - 2.52| 0.162 0.141 Diferente
‘25<=PRL<100’ - ‘PRL>100’ |2.307 - 2.52| 0.213 0.141 Diferente


7. Regressão Segmentada e “Ponto Inflexão”


################################################################################
################################################################################
############### (7) Regressão Segmentada e "Ponto Inflexão" ####################
################################################################################
################################################################################

library(utf8)
set.seed(210002859)

################################################################################
## 7.1 Treinando o Modelo, Changepoint e Equação da Regressão Linear ###########
################################################################################

################################################################################
### 7.1.1 Treinando o modelo ___________________________________________________
################################################################################

fit.lm<-lm(eixo_y ~ eixo_x,data = train_data)

fit.segmented<-segmented(fit.lm,data = train_data,
               npsi = 1,control = seg.control(display = FALSE))

################################################################################
### 7.1.2 Identificando do Ponto de Inflexão ___________________________________
################################################################################

Changepoint<-round(confint.segmented(fit.segmented)[1,1],2)

LI.IC95.Changepoint<-round(confint.segmented(fit.segmented,method="delta")[1,2],2)
LS.IC95.Changepoint<-round(confint.segmented(fit.segmented,method="delta")[1,3],2)

IC95.Changepoint<-paste("IC 95%: ",LI.IC95.Changepoint," a ",
                        LS.IC95.Changepoint,sep = "")
Texto.Changepoint.e.IC95<-paste("Ponto de inflexão: ",Changepoint,"; ",
                                IC95.Changepoint,sep = "")

################################################################################
### 7.1.3 Equação de Regressão para valores <= ao Ponto de Inflexão ____________
################################################################################

subset.below<-subset(base_de_dados, eixo_x <= Changepoint)

fit.below<-lm(eixo_y ~ eixo_x, data = subset.below)

eq.below<-paste(rotulo_eixo_y1,"=",
          round(coef(fit.below)[1], 4), "+", 
          round(coef(fit.below)[2], 4), "x", rotulo_eixo_x1)

r.pearson.below<-cor.test(subset.below$eixo_x,subset.below$eixo_y)

r.below<-round(r.pearson.below$estimate,2)

r.below.IC95<-paste(round(r.pearson.below$conf.int[1],2)," a ",
                    round(r.pearson.below$conf.int[2],2))

p.valor.r.below<-round(r.pearson.below$p.value,8)



Texto.eq.below<-paste("Eq Reg. linear <= à Broken-line: ",eq.below,
                      " (r: ",r.below,
                      ";IC 95%: ",r.below.IC95,
                      "; p-valor:",p.valor.r.below,")",
                      sep = "")
R2.below<-r.below^2

################################################################################
### 7.1.4 Equação de Regressão para valores > do que Ponto de Inflexão _________
################################################################################


subset.above<-subset(base_de_dados, eixo_x > Changepoint)

fit.above<-lm(eixo_y ~ eixo_x, data = subset.above)

eq.above<-paste(rotulo_eixo_y1,"=",
          round(coef(fit.above)[1], 4), "+", 
          round(coef(fit.above)[2], 4), "x", rotulo_eixo_x1)

r.pearson.above<-cor.test(subset.above$eixo_x,subset.above$eixo_y)

r.above<-round(r.pearson.above$estimate,2)

r.above.IC95<-paste(round(r.pearson.above$conf.int[1],2)," a ",
                    round(r.pearson.above$conf.int[2],2))

p.valor.r.above<-round(r.pearson.above$p.value,8)



Texto.eq.above<-paste("Eq Reg. linear <= à Broken-line: ",eq.above,
                      " (r: ",r.above,
                      ";IC 95%: ",r.above.IC95,
                      "; p-valor:",p.valor.r.above,")",
                      sep = "")

R2.above<-r.above^2

################################################################################
### 7.2 Gráfico Regressão Segmentada ###########################################
################################################################################

Comp.Regressao.Seg <- ggplot(base_de_dados, aes(eixo_x, eixo_y)) + 
  geom_point(size = 4.5, color = "black", fill = NA) +
  geom_point(aes(color = ifelse(eixo_x <= Changepoint, "#00AFBB", 
  "#FC4E07")), size = 4) +
  scale_color_manual(values = c("#00AFBB", "#FC4E07")) +
  geom_smooth(method = "lm", se = FALSE, color = "black", linewidth = 1,
  data = subset.below) +
  geom_smooth(method = "lm", se = FALSE, color = "black", linewidth = 1,
  data = subset.above) +
  geom_vline(xintercept = Changepoint,color = "red", 
  linetype = "dashed",linewidth  = 1) +
  annotate("rect",xmin=LI.IC95.Changepoint,xmax=LS.IC95.Changepoint,
  ymin=-Inf,ymax=Inf,alpha=0.15, fill="red") +
  geom_text(x = Changepoint+65, y = 0.99*max(base_de_dados$eixo_y), 
  label = Texto.Changepoint.e.IC95,size = 4, color = "black") +
  ggtitle("") + 
  xlab(rotulo_eixo_x_médio) + 
  ylab(rotulo_eixo_y_médio) +
  scale_y_continuous(n.breaks=8) +
  scale_x_continuous(n.breaks=10) +
  theme(axis.line = element_line(colour = "black", size = 0.5),
  axis.title = element_text(size = 14),
  axis.text  = element_text (colour ="black",size = 14),
  plot.title=element_text(size=14,face = "bold"),
  plot.background = element_blank(),
  panel.background = element_blank(),
  legend.position = "none")


Comp.Regressao.Seg
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Comp.Regressao.Seg2<-ggplotly(Comp.Regressao.Seg)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
Comp.Regressao.Seg2
ggsave("4_Figuras/Fig.11_Regressão_Segmentada_e_Broken_line.png",
       plot = Comp.Regressao.Seg, device = "png")
## Saving 7 x 5 in image
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
################################################################################
### 7.3 Tabela com resultados do gráfico #######################################
################################################################################

library(utf8)

Tabela28<-data.frame(Resultados=c(Texto.Changepoint.e.IC95,Texto.eq.below,Texto.eq.above))
colnames_Tabela28 <- "Resultados da Regressão segmentada"
colnames_Tabela28 <- enc2utf8(colnames_Tabela28)
Tabela28_Titulo<-"Tabela 28. Equação regressão e Ponto de Inflexão."
Tabela28.Nota1<-"O 'Ponto de Inflexão' usada para segmentação dos dados foi o limmite inferior do Intervalo de Confinaça de 95% do 'Break-point'."

kable(Tabela28,col.names = colnames_Tabela28, align = "ccc",
  caption = Tabela28_Titulo) %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE, color = "White",background = "#0d47a1")%>%
   footnote(general_title = "Nota de rodapé:", general = Tabela28.Nota1)
Tabela 28. Equação regressão e Ponto de Inflexão.
Resultados da Regressão segmentada
Ponto de inflexão: 11.23; IC 95%: 9.58 a 12.87
Eq Reg. linear <= à Broken-line: HOMA-IR = 3.2604 + -0.0859 x Prolactina (r: -0.93;IC 95%: -0.97 a -0.87; p-valor:0)
Eq Reg. linear <= à Broken-line: HOMA-IR = 2.2603 + 0.0018 x Prolactina (r: 0.36;IC 95%: 0.14 a 0.55; p-valor:0.00192933)
Nota de rodapé:
O ‘Ponto de Inflexão’ usada para segmentação dos dados foi o limmite inferior do Intervalo de Confinaça de 95% do ‘Break-point’.
################################################################################
## 7.4 Selecionando o subset linear ############################################
################################################################################

subset.select<-function(R2.below,R2.above) {
  if (R2.below>R2.above) {return (subset.below)}
  else if (R2.below<R2.above){return (subset.above)}
  else {return (if (nrow(subset.below)>nrow(subset.above)){return (subset.below)}
            else {return (subset.above)})}
}
subset.select<-subset.select(R2.below = R2.below,R2.above = R2.above)  

################################################################################
## 7.5 Dividindo o subset em treino e teste ####################################
################################################################################

sub_amostra_treino <- sample(nrow(subset.select), replace=FALSE, 
                         size=train_prop*nrow(subset.select))
subset.train_data <- subset.select[sub_amostra_treino, ]
subset.test_data <- subset.select[-sub_amostra_treino, ]

################################################################################
## 7.6 Verificando se o modelo treinado overfita os dados ######################
###################### (Trecho linear) #########################################
################################################################################

yhat_treino_Reg.Seg <-predict.segmented(fit.segmented,newdata=subset.train_data,
                                        interval="confidence")
Desempenho_treino_Reg.Seg<-data.frame(Desempenho=
                           defaultSummary(data.frame(obs=subset.train_data$eixo_y,
                           pred=yhat_treino_Reg.Seg[,1])))
Desempenho_treino_Reg.Seg<-round(Desempenho_treino_Reg.Seg,2)

################################################################################
## 7.7 Verificando a capacidade de Generalização do modelo #####################
###################### (Trecho linear) #########################################
################################################################################

yhat_teste_Reg.Seg <-predict.segmented(fit.segmented,newdata=subset.test_data,
                                       interval="confidence")
Desempenho_teste_Reg.Seg<-data.frame(Desempenho=
                           defaultSummary(data.frame(obs=subset.test_data$eixo_y,
                           pred=yhat_teste_Reg.Seg[,1])))
Desempenho_teste_Reg.Seg<-round(Desempenho_teste_Reg.Seg,2)

RMSE_reg.seg<-Desempenho_teste_Reg.Seg$Desempenho[1]
R2_reg.seg<-Desempenho_teste_Reg.Seg$Desempenho[2]
MAE_reg.seg<-Desempenho_teste_Reg.Seg$Desempenho[3]

Generalização_reg.seg<-paste("RMSE: ",RMSE_reg.seg,"; ",
                             "R2: ",R2_reg.seg,"; ",
                             "MAE:",MAE_reg.seg,sep = "")

################################################################################
## 7.8 Tabela - Avaliando do desempenho do modelo ##############################
################################################################################

metricas<-c("Raiz Quadrática Média dos Erros (RMSE)",
            "Coeficiente de Determinação (Rsquared)",
            "Erro Médio Absoluto (MAE)")

Tabela29<- data.frame(Metricas=metricas,
                     Desempenho_treino=Desempenho_treino_Reg.Seg$Desempenho,
                     Desempenho_teste=Desempenho_teste_Reg.Seg$Desempenho)

colnames_Tabela29 <- c("Métricas", "Desempenho dados treino",
                       "Desempenho dados teste")

Tabela29_Titulo<-"Tabela 29. Comparando o desempenho nos dados de treino e teste - Verificando a capacidade de generalização do modelo de Regressão Segmentada."
Tabela29.Nota1<-"Capacidade Preditiva utilizando o modelo de regressão segmentada e utilizando todo o dataset."

kable(Tabela29,col.names = colnames_Tabela29,
      align = "ccc",caption = Tabela29_Titulo) %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2")%>%
  footnote(general_title = "Capacidade de Generalização baseada na Regressão Segmentada:", 
  general = Tabela29.Nota1)
Tabela 29. Comparando o desempenho nos dados de treino e teste - Verificando a capacidade de generalização do modelo de Regressão Segmentada.
Métricas Desempenho dados treino Desempenho dados teste
Raiz Quadrática Média dos Erros (RMSE) 0.09 0.07
Coeficiente de Determinação (Rsquared) 0.87 0.91
Erro Médio Absoluto (MAE) 0.07 0.05
Capacidade de Generalização baseada na Regressão Segmentada:
Capacidade Preditiva utilizando o modelo de regressão segmentada e utilizando todo o dataset.


8. Splines de Regressão Adaptativa Multivariada


## Saving 7 x 5 in image
Tabela 30. Comparando o desempenho nos dados de treino e teste (Verificando a capacidade de generalização).
Métricas Desempenho dados treino Desempenho dados teste
Raiz Quadrática Média dos Erros (RMSE) 0.07 0.07
Coeficiente de Determinação (Rsquared) 0.92 0.89
Erro Médio Absoluto (MAE) 0.05 0.06
Capacidade de Generalização baseada na Splines de Regressão Adaptativa Multivariada (MARS):
Capacidade Preditiva utilizando o modelo MARS e utilizando apenas o subset com maior linearidade.


9. Previsoes e Desempenho


################################################################################
################################################################################
############# (9) Previsões da variável desfecho com base  #####################
############# no resultado do Limite Innferior do ChangePoint ##################
################################################################################
################################################################################

library(utf8)
set.seed(210002859)

################################################################################
## 9.1 Regressão Segmentada ####################################################
################################################################################

yhat_Reg.Seg_Changepoint <-predict.segmented(fit.segmented,
                           newdata=data.frame(eixo_x = LI.IC95.Changepoint),
                           interval="confidence")

direção<-ifelse(r.below<0,">=","<=")

Interpr_yhat_Reg.Seg_Changepoint<-paste(rotulo_eixo_y,direção,
                        round(yhat_Reg.Seg_Changepoint[1,1],2),"(IC95%:",
                        round(yhat_Reg.Seg_Changepoint[1,2],2),"a",
                        round(yhat_Reg.Seg_Changepoint[1,3],2),")")

################################################################################
## 9.2 Splines de Regressão Adaptativa Multivariada ############################
################################################################################

yhat_MARS_Changepoint<-predict(fit.MARS2,
                        newdata=data.frame(eixo_x = LI.IC95.Changepoint))

yhat_Centro_MARS_Changepoint<-round(yhat_MARS_Changepoint,2)    

Interpretação_fit.MARS_Predição<-paste(rotulo_eixo_y,direção,
                                 yhat_Centro_MARS_Changepoint)                             
                             
################################################################################
## 9.3 Tabela - Avaliando do desempenho do modelo ##############################
################################################################################

Modelo_previsao<-c("Regressão Segmentada",
                   "Splines de Regressão Adaptativa Multivariada")

Previsao<-c(Interpr_yhat_Reg.Seg_Changepoint,
            Interpretação_fit.MARS_Predição)

Capacidade_genetalizacao<-c(Generalização_reg.seg,
                            Generalização_MARS)

Tabela31<- data.frame(Modelo_Previsao =Modelo_previsao,
                     Previsao=Previsao,
                     Generalizacao=Capacidade_genetalizacao)

Previsão<-paste("Previsão resultados de ",rotulo_eixo_y1,sep = "")
colnames_Tabela31 <- c("Modelos usados para Previsão",
               Previsão,"Métricas avaliação (Capacidade Generalização)")

Tabela31_Titulo<-"Tabela 31. Comparando o desempenho nos dados de treino e teste (Verificando a capacidade de generalização)."

Tabela31.Nota1<-"Capacidade Preditiva utilizando o modelo de regressão segmentada e utilizando todo o dataset."

Tabela31.Nota2<-"Capacidade Preditiva utilizando o modelo MARS e utilizando apenas o subset com maior linearidade."

kable(Tabela31,col.names = colnames_Tabela31,align = "ccc",
  caption = Tabela31_Titulo) %>%
  kable_styling(full_width = FALSE,
  bootstrap_options = c("striped","hover","condensed","responsive")) %>%
  row_spec(0,bold = TRUE,color = "White",background = "#0d47a1") %>%
  column_spec(1,bold = TRUE,color = "White",background = "#1976d2")%>%
  footnote(general_title = "Capacidade de Generalização baseada na Splines de Regressão Adaptativa Multivariada (MARS):", general = Tabela31.Nota2) %>%
  footnote(general_title = "Capacidade de Generalização baseada na Regressão Segmentada:", 
  general = Tabela31.Nota1)
Tabela 31. Comparando o desempenho nos dados de treino e teste (Verificando a capacidade de generalização).
Modelos usados para Previsão Previsão resultados de HOMA-IR Métricas avaliação (Capacidade Generalização)
Regressão Segmentada HOMA-IR >= 2.43 (IC95%: 2.33 a 2.53 ) RMSE: 0.07; R2: 0.91; MAE:0.05
Splines de Regressão Adaptativa Multivariada HOMA-IR >= 2.46 RMSE: 0.07; R2: 0.89; MAE:0.06
Capacidade de Generalização baseada na Splines de Regressão Adaptativa Multivariada (MARS):
Capacidade Preditiva utilizando o modelo MARS e utilizando apenas o subset com maior linearidade.
Capacidade de Generalização baseada na Regressão Segmentada:
Capacidade Preditiva utilizando o modelo de regressão segmentada e utilizando todo o dataset.